def generate_codeobj_source(self, writer): # Generate data for non-constant values code_object_defs = defaultdict(list) for codeobj in self.code_objects.itervalues(): lines = [] for k, v in codeobj.variables.iteritems(): if isinstance(v, AttributeVariable): # We assume all attributes are implemented as property-like methods line = 'const {c_type} {varname} = {objname}.{attrname}();' lines.append( line.format(c_type=c_data_type(v.dtype), varname=k, objname=v.obj.name, attrname=v.attribute)) elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = {dyn_array_name}.empty()? 0 : &{dyn_array_name}[0];' line = line.format( c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) lines.append(line) line = 'const int _num{k} = {dyn_array_name}.size();' line = line.format( k=k, dyn_array_name=dyn_array_name) lines.append(line) else: lines.append('const int _num%s = %s;' % (k, v.size)) except TypeError: pass for line in lines: # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = freeze(codeobj.code.cpp_file, ns) code = code.replace('%CONSTANTS%', '\n'.join(code_object_defs[codeobj.name])) code = '#include "objects.h"\n' + code writer.write('code_objects/' + codeobj.name + '.cpp', code) writer.write('code_objects/' + codeobj.name + '.h', codeobj.code.h_file)
def _add_user_function(self, varname, variable): impl = variable.implementations[self.codeobj_class] support_code = [] hash_defines = [] pointers = [] user_functions = [(varname, variable)] funccode = impl.get_code(self.owner) if isinstance(funccode, str): # Rename references to any dependencies if necessary for dep_name, dep in iteritems(impl.dependencies): dep_impl = dep.implementations[self.codeobj_class] dep_impl_name = dep_impl.name if dep_impl_name is None: dep_impl_name = dep.pyfunc.__name__ if dep_name != dep_impl_name: funccode = word_substitute(funccode, {dep_name: dep_impl_name}) funccode = {'support_code': funccode} if funccode is not None: # To make namespace variables available to functions, we # create global variables and assign to them in the main # code func_namespace = impl.get_namespace(self.owner) or {} for ns_key, ns_value in iteritems(func_namespace): if hasattr(ns_value, 'dtype'): if ns_value.shape == (): raise NotImplementedError(( 'Directly replace scalar values in the function ' 'instead of providing them via the namespace')) type_str = c_data_type(ns_value.dtype) + '*' else: # e.g. a function type_str = 'py::object' support_code.append('static {0} _namespace{1};'.format(type_str, ns_key)) pointers.append('_namespace{0} = {1};'.format(ns_key, ns_key)) support_code.append(deindent(funccode.get('support_code', ''))) hash_defines.append(deindent(funccode.get('hashdefine_code', ''))) dep_hash_defines = [] dep_pointers = [] dep_support_code = [] if impl.dependencies is not None: for dep_name, dep in iteritems(impl.dependencies): if dep_name not in self.variables: # do not add a dependency twice self.variables[dep_name] = dep dep_impl = dep.implementations[self.codeobj_class] if dep_name != dep_impl.name: self.func_name_replacements[dep_name] = dep_impl.name hd, ps, sc, uf = self._add_user_function(dep_name, dep) dep_hash_defines.extend(hd) dep_pointers.extend(ps) dep_support_code.extend(sc) user_functions.extend(uf) return (dep_hash_defines + hash_defines, dep_pointers + pointers, dep_support_code + support_code, user_functions)
def generate_codeobj_source(self, writer): # Generate data for non-constant values code_object_defs = defaultdict(list) for codeobj in self.code_objects.itervalues(): lines = [] for k, v in codeobj.variables.iteritems(): if isinstance(v, AttributeVariable): # We assume all attributes are implemented as property-like methods line = 'const {c_type} {varname} = {objname}.{attrname}();' lines.append(line.format(c_type=c_data_type(v.dtype), varname=k, objname=v.obj.name, attrname=v.attribute)) elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = &{dyn_array_name}[0];' line = line.format(c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) lines.append(line) line = 'const int _num{k} = {dyn_array_name}.size();' line = line.format(k=k, dyn_array_name=dyn_array_name) lines.append(line) else: lines.append('const int _num%s = %s;' % (k, v.size)) except TypeError: pass for line in lines: # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = freeze(codeobj.code.cpp_file, ns) code = code.replace('%CONSTANTS%', '\n'.join(code_object_defs[codeobj.name])) code = '#include "objects.h"\n'+code writer.write('code_objects/'+codeobj.name+'.cpp', code) writer.write('code_objects/'+codeobj.name+'.h', codeobj.code.h_file)
def _add_user_function(self, varname, variable): impl = variable.implementations[self.codeobj_class] support_code = [] hash_defines = [] pointers = [] user_functions = [(varname, variable)] funccode = impl.get_code(self.owner) if isinstance(funccode, basestring): funccode = {'support_code': funccode} if funccode is not None: # To make namespace variables available to functions, we # create global variables and assign to them in the main # code func_namespace = impl.get_namespace(self.owner) or {} for ns_key, ns_value in func_namespace.iteritems(): if hasattr(ns_value, 'dtype'): if ns_value.shape == (): raise NotImplementedError(( 'Directly replace scalar values in the function ' 'instead of providing them via the namespace')) type_str = c_data_type(ns_value.dtype) + '*' else: # e.g. a function type_str = 'py::object' support_code.append('static {0} _namespace{1};'.format(type_str, ns_key)) pointers.append('_namespace{0} = {1};'.format(ns_key, ns_key)) support_code.append(deindent(funccode.get('support_code', ''))) hash_defines.append(deindent(funccode.get('hashdefine_code', ''))) dep_hash_defines = [] dep_pointers = [] dep_support_code = [] if impl.dependencies is not None: for dep_name, dep in impl.dependencies.iteritems(): self.variables[dep_name] = dep hd, ps, sc, uf = self._add_user_function(dep_name, dep) dep_hash_defines.extend(hd) dep_pointers.extend(ps) dep_support_code.extend(sc) user_functions.extend(uf) return (dep_hash_defines + hash_defines, dep_pointers + pointers, dep_support_code + support_code, user_functions)
def write_static_arrays(self, directory): # # Find np arrays in the namespaces and convert them into static # # arrays. Hopefully they are correctly used in the code: For example, # # this works for the namespaces for functions with C++ (e.g. TimedArray # # treats it as a C array) but does not work in places that are # # implicitly vectorized (state updaters, resets, etc.). But arrays # # shouldn't be used there anyway. for code_object in self.code_objects.itervalues(): for name, value in code_object.variables.iteritems(): if isinstance(value, np.ndarray): self.static_arrays[name] = value logger.debug("static arrays: "+str(sorted(self.static_arrays.keys()))) static_array_specs = [] for name, arr in sorted(self.static_arrays.items()): arr.tofile(os.path.join(directory, 'static_arrays', name)) static_array_specs.append((name, c_data_type(arr.dtype), arr.size, name)) self.static_array_specs = static_array_specs
def generate_codeobj_source(self, writer): # Generate data for non-constant values code_object_defs = defaultdict(list) for codeobj in self.code_objects.itervalues(): lines = [] for k, v in codeobj.variables.iteritems(): if isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = ( "{c_type}* const {array_name} = {dyn_array_name}.empty()? 0 : &{dyn_array_name}[0];" ) line = line.format( c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name ) lines.append(line) line = "const int _num{k} = {dyn_array_name}.size();" line = line.format(k=k, dyn_array_name=dyn_array_name) lines.append(line) else: lines.append("const int _num%s = %s;" % (k, v.size)) except TypeError: pass for line in lines: # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = self.freeze(codeobj.code.cpp_file, ns) code = code.replace("%CONSTANTS%", "\n".join(code_object_defs[codeobj.name])) code = '#include "objects.h"\n' + code writer.write("code_objects/" + codeobj.name + ".cpp", code) writer.write("code_objects/" + codeobj.name + ".h", codeobj.code.h_file)
def write_static_arrays(self, directory): # # Find np arrays in the namespaces and convert them into static # # arrays. Hopefully they are correctly used in the code: For example, # # this works for the namespaces for functions with C++ (e.g. TimedArray # # treats it as a C array) but does not work in places that are # # implicitly vectorized (state updaters, resets, etc.). But arrays # # shouldn't be used there anyway. for code_object in self.code_objects.itervalues(): for name, value in code_object.variables.iteritems(): if isinstance(value, np.ndarray): self.static_arrays[name] = value logger.debug("static arrays: " + str(sorted(self.static_arrays.keys()))) static_array_specs = [] for name, arr in sorted(self.static_arrays.items()): arr.tofile(os.path.join(directory, 'static_arrays', name)) static_array_specs.append( (name, c_data_type(arr.dtype), arr.size, name)) self.static_array_specs = static_array_specs
def generate_codeobj_source(self, writer): #check how many random numbers are needed per step for code_object in self.code_objects.itervalues(): num_occurences_rand = code_object.code.cu_file.count("_rand(") num_occurences_randn = code_object.code.cu_file.count("_randn(") if num_occurences_rand > 0: if code_object.template_name != "synapses_create": #first one is alway the definition, so subtract 1 code_object.rand_calls = num_occurences_rand - 1 for i in range(0, code_object.rand_calls): if code_object.owner.N != 0: code_object.code.cu_file = code_object.code.cu_file.replace("_rand(_vectorisation_idx)", "_rand(_vectorisation_idx + " + str(i) + " * " + str(code_object.owner.N) + ")", 1) else: code_object.code.cu_file = code_object.code.cu_file.replace("_rand(_vectorisation_idx)", "_rand(_vectorisation_idx + " + str(i) + " * _N)", 1) else: code_object.code.cu_file = code_object.code.cu_file.replace("_rand(_vectorisation_idx)", "(rand()/(float)RAND_MAX)") if num_occurences_randn > 0 and code_object.template_name != "synapses_create": #first one is alway the definition, so subtract 1 code_object.randn_calls = num_occurences_randn - 1 for i in range(0, code_object.randn_calls): if code_object.owner.N != 0: code_object.code.cu_file = code_object.code.cu_file.replace("_randn(_vectorisation_idx)", "_randn(_vectorisation_idx + " + str(i) + " * " + str(code_object.owner.N) + ")", 1) else: code_object.code.cu_file = code_object.code.cu_file.replace("_randn(_vectorisation_idx)", "_randn(_vectorisation_idx + " + str(i) + " * _N)", 1) code_object_defs = defaultdict(list) host_parameters = defaultdict(list) device_parameters = defaultdict(list) kernel_variables = defaultdict(list) # Generate data for non-constant values for codeobj in self.code_objects.itervalues(): code_object_defs_lines = [] host_parameters_lines = [] device_parameters_lines = [] kernel_variables_lines = [] additional_code = [] number_elements = "" if hasattr(codeobj, 'owner') and hasattr(codeobj.owner, '_N') and codeobj.owner._N != 0: number_elements = str(codeobj.owner._N) else: number_elements = "_N" for k, v in codeobj.variables.iteritems(): #code objects which only run once if k == "_python_randn" and codeobj.runs_every_tick == False and codeobj.template_name != "synapses_create": additional_code.append(''' //genenerate an array of random numbers on the device float* dev_array_randn; cudaMalloc((void**)&dev_array_randn, sizeof(float)*''' + number_elements + ''' * ''' + str(codeobj.randn_calls) + '''); if(!dev_array_randn) { printf("ERROR while allocating device memory with size %ld\\n", sizeof(float)*''' + number_elements + '''*''' + str(codeobj.randn_calls) + '''); } curandGenerator_t uniform_gen; curandCreateGenerator(&uniform_gen, CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(uniform_gen, time(0)); curandGenerateNormal(uniform_gen, dev_array_randn, ''' + number_elements + '''*''' + str(codeobj.randn_calls) + ''', 0, 1);''') line = "float* _array_{name}_randn".format(name=codeobj.name) device_parameters_lines.append(line) host_parameters_lines.append("dev_array_randn") elif k == "_python_rand" and codeobj.runs_every_tick == False and codeobj.template_name != "synapses_create": additional_code.append(''' //genenerate an array of random numbers on the device float* dev_array_rand; cudaMalloc((void**)&dev_array_rand, sizeof(float)*''' + number_elements + '''*''' + str(codeobj.rand_calls) + '''); if(!dev_array_rand) { printf("ERROR while allocating device memory with size %ld\\n", sizeof(float)*''' + number_elements + '''*''' + str(codeobj.rand_calls) + '''); } curandGenerator_t normal_gen; curandCreateGenerator(&normal_gen, CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(normal_gen, time(0)); curandGenerateUniform(normal_gen, dev_array_rand, ''' + number_elements + '''*''' + str(codeobj.rand_calls) + ''');''') line = "float* _array_{name}_rand".format(name=codeobj.name) device_parameters_lines.append(line) host_parameters_lines.append("dev_array_rand") elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = thrust::raw_pointer_cast(&dev{dyn_array_name}[0]);' line = line.format(c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) code_object_defs_lines.append(line) line = 'const int _num{k} = dev{dyn_array_name}.size();' line = line.format(k=k, dyn_array_name=dyn_array_name) code_object_defs_lines.append(line) host_parameters_lines.append(array_name) host_parameters_lines.append("_num" + k) line = "{c_type}* par_{array_name}" device_parameters_lines.append(line.format(c_type=c_data_type(v.dtype), array_name=array_name)) line = "int par_num_{array_name}" device_parameters_lines.append(line.format(array_name=k)) line = "{c_type}* _ptr{array_name} = par_{array_name};" kernel_variables_lines.append(line.format(c_type=c_data_type(v.dtype), array_name=array_name)) line = "const int _num{array_name} = par_num_{array_name};" kernel_variables_lines.append(line.format(array_name=k)) else: host_parameters_lines.append("dev"+self.get_array_name(v)) device_parameters_lines.append("%s* par_%s" % (c_data_type(v.dtype), self.get_array_name(v))) kernel_variables_lines.append("%s* _ptr%s = par_%s;" % (c_data_type(v.dtype), self.get_array_name(v), self.get_array_name(v))) code_object_defs_lines.append('const int _num%s = %s;' % (k, v.size)) kernel_variables_lines.append('const int _num%s = %s;' % (k, v.size)) except TypeError: pass # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice for line in code_object_defs_lines: if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) for line in host_parameters_lines: if not line in host_parameters[codeobj.name]: host_parameters[codeobj.name].append(line) for line in device_parameters_lines: if not line in device_parameters[codeobj.name]: device_parameters[codeobj.name].append(line) for line in kernel_variables_lines: if not line in kernel_variables[codeobj.name]: kernel_variables[codeobj.name].append(line) for line in additional_code: if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = self.freeze(codeobj.code.cu_file, ns) if len(host_parameters[codeobj.name]) == 0: host_parameters[codeobj.name].append("0") device_parameters[codeobj.name].append("int dummy") code = code.replace('%CONSTANTS%', '\n\t\t'.join(code_object_defs[codeobj.name])) code = code.replace('%HOST_PARAMETERS%', ',\n\t\t\t'.join(host_parameters[codeobj.name])) code = code.replace('%DEVICE_PARAMETERS%', ',\n\t'.join(device_parameters[codeobj.name])) code = code.replace('%KERNEL_VARIABLES%', '\n\t'.join(kernel_variables[codeobj.name])) code = code.replace('%CODEOBJ_NAME%', codeobj.name) code = '#include "objects.h"\n'+code writer.write('code_objects/'+codeobj.name+'.cu', code) writer.write('code_objects/'+codeobj.name+'.h', codeobj.code.h_file)
def atomics_parallelisation(self, statement, conditional_write_vars, used_variables): if not self._use_atomics: raise ParallelisationError() # Avoids circular import from brian2.devices.device import device # See https://github.com/brian-team/brian2/pull/531 for explanation used = set(get_identifiers(statement.expr)) used = used.intersection(k for k in self.variables.keys() if k in self.variable_indices and self.variable_indices[k] != '_idx') used_variables.update(used) if statement.var in used_variables: raise ParallelisationError() expr = self.translate_expression(statement.expr) if statement.op == ':=' or self.variable_indices[statement.var] == '_idx' or not statement.inplace: if statement.op == ':=': decl = self.c_data_type(statement.dtype) + ' ' op = '=' if statement.constant: decl = 'const ' + decl else: decl = '' op = statement.op line = '{decl}{var} {op} {expr};'.format(decl=decl, var=statement.var, op=op, expr=expr) line = [line] elif statement.inplace: sign = '' if statement.op == '+=': atomic_op = '_brian_atomicAdd' elif statement.op == '-=': # CUDA has hardware implementations for float (and for CC>=6.0 # for double) only for atomicAdd, which is faster then our # software implementation atomic_op = '_brian_atomicAdd' sign = '-' elif statement.op == '*=': atomic_op = '_brian_atomicMul' elif statement.op == '/=': atomic_op = '_brian_atomicDiv' else: # TODO: what other inplace operations are possible? Can we # implement them with atomicCAS ? logger.info("Atomic operation for operation {op} is not implemented." "".format(op=statement.op)) raise ParallelisationError() line = '{atomic_op}(&{array_name}[{idx}], ({array_dtype}){sign}({expr}));'.format( atomic_op=atomic_op, array_name=self.get_array_name(self.variables[statement.var]), idx=self.variable_indices[statement.var], array_dtype=c_data_type(self.variables[statement.var].dtype), sign=sign, expr=expr) # this is now a list of 1 or 2 lines (potentially with if(...)) line = self.conditional_write(line, statement, conditional_write_vars) else: raise ParallelisationError() if len(statement.comment): line[-1] += ' // ' + statement.comment return line
def build( self, project_dir='output', compile_project=True, run_project=False, debug=True, with_output=True, native=True, additional_source_files=None, additional_header_files=None, main_includes=None, run_includes=None, run_args=None, ): ''' Build the project TODO: more details Parameters ---------- project_dir : str The output directory to write the project to, any existing files will be overwritten. compile_project : bool Whether or not to attempt to compile the project using GNU make. run_project : bool Whether or not to attempt to run the built project if it successfully builds. debug : bool Whether to compile in debug mode. with_output : bool Whether or not to show the ``stdout`` of the built program when run. native : bool Whether or not to compile natively using the ``--march=native`` gcc option. additional_source_files : list of str A list of additional ``.cpp`` files to include in the build. additional_header_files : list of str A list of additional ``.h`` files to include in the build. main_includes : list of str A list of additional header files to include in ``main.cpp``. run_includes : list of str A list of additional header files to include in ``run.cpp``. ''' if additional_source_files is None: additional_source_files = [] if additional_header_files is None: additional_header_files = [] if main_includes is None: main_includes = [] if run_includes is None: run_includes = [] if run_args is None: run_args = [] self.project_dir = project_dir ensure_directory(project_dir) for d in ['code_objects', 'results', 'static_arrays']: ensure_directory(os.path.join(project_dir, d)) writer = CPPWriter(project_dir) logger.debug("Writing C++ standalone project to directory " + os.path.normpath(project_dir)) arange_arrays = sorted( [(var, start) for var, start in self.arange_arrays.iteritems()], key=lambda (var, start): var.name) # # Find np arrays in the namespaces and convert them into static # # arrays. Hopefully they are correctly used in the code: For example, # # this works for the namespaces for functions with C++ (e.g. TimedArray # # treats it as a C array) but does not work in places that are # # implicitly vectorized (state updaters, resets, etc.). But arrays # # shouldn't be used there anyway. for code_object in self.code_objects.itervalues(): for name, value in code_object.variables.iteritems(): if isinstance(value, np.ndarray): self.static_arrays[name] = value # write the static arrays logger.debug("static arrays: " + str(sorted(self.static_arrays.keys()))) static_array_specs = [] for name, arr in sorted(self.static_arrays.items()): arr.tofile(os.path.join(project_dir, 'static_arrays', name)) static_array_specs.append( (name, c_data_type(arr.dtype), arr.size, name)) # Write the global objects networks = [ net() for net in Network.__instances__() if net().name != '_fake_network' ] synapses = [S() for S in Synapses.__instances__()] arr_tmp = CPPStandaloneCodeObject.templater.objects( None, None, array_specs=self.arrays, dynamic_array_specs=self.dynamic_arrays, dynamic_array_2d_specs=self.dynamic_arrays_2d, zero_arrays=self.zero_arrays, arange_arrays=arange_arrays, synapses=synapses, clocks=self.clocks, static_array_specs=static_array_specs, networks=networks, ) writer.write('objects.*', arr_tmp) main_lines = [] procedures = [('', main_lines)] runfuncs = {} for func, args in self.main_queue: if func == 'run_code_object': codeobj, = args main_lines.append('_run_%s();' % codeobj.name) elif func == 'run_network': net, netcode = args main_lines.extend(netcode) elif func == 'set_by_array': arrayname, staticarrayname = args code = ''' for(int i=0; i<_num_{staticarrayname}; i++) {{ {arrayname}[i] = {staticarrayname}[i]; }} '''.format(arrayname=arrayname, staticarrayname=staticarrayname) main_lines.extend(code.split('\n')) elif func == 'set_array_by_array': arrayname, staticarrayname_index, staticarrayname_value = args code = ''' for(int i=0; i<_num_{staticarrayname_index}; i++) {{ {arrayname}[{staticarrayname_index}[i]] = {staticarrayname_value}[i]; }} '''.format(arrayname=arrayname, staticarrayname_index=staticarrayname_index, staticarrayname_value=staticarrayname_value) main_lines.extend(code.split('\n')) elif func == 'insert_code': main_lines.append(args) elif func == 'start_run_func': name, include_in_parent = args if include_in_parent: main_lines.append('%s();' % name) main_lines = [] procedures.append((name, main_lines)) elif func == 'end_run_func': name, include_in_parent = args name, main_lines = procedures.pop(-1) runfuncs[name] = main_lines name, main_lines = procedures[-1] else: raise NotImplementedError("Unknown main queue function type " + func) # generate the finalisations for codeobj in self.code_objects.itervalues(): if hasattr(codeobj.code, 'main_finalise'): main_lines.append(codeobj.code.main_finalise) # Generate data for non-constant values code_object_defs = defaultdict(list) for codeobj in self.code_objects.itervalues(): lines = [] for k, v in codeobj.variables.iteritems(): if isinstance(v, AttributeVariable): # We assume all attributes are implemented as property-like methods line = 'const {c_type} {varname} = {objname}.{attrname}();' lines.append( line.format(c_type=c_data_type(v.dtype), varname=k, objname=v.obj.name, attrname=v.attribute)) elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = &{dyn_array_name}[0];' line = line.format( c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) lines.append(line) line = 'const int _num{k} = {dyn_array_name}.size();' line = line.format( k=k, dyn_array_name=dyn_array_name) lines.append(line) else: lines.append('const int _num%s = %s;' % (k, v.size)) except TypeError: pass for line in lines: # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = freeze(codeobj.code.cpp_file, ns) code = code.replace('%CONSTANTS%', '\n'.join(code_object_defs[codeobj.name])) code = '#include "objects.h"\n' + code writer.write('code_objects/' + codeobj.name + '.cpp', code) writer.write('code_objects/' + codeobj.name + '.h', codeobj.code.h_file) # The code_objects are passed in the right order to run them because they were # sorted by the Network object. To support multiple clocks we'll need to be # smarter about that. main_tmp = CPPStandaloneCodeObject.templater.main( None, None, main_lines=main_lines, code_objects=self.code_objects.values(), report_func=self.report_func, dt=float(defaultclock.dt), additional_headers=main_includes, ) writer.write('main.cpp', main_tmp) # Generate the run functions run_tmp = CPPStandaloneCodeObject.templater.run( None, None, run_funcs=runfuncs, code_objects=self.code_objects.values(), additional_headers=run_includes, ) writer.write('run.*', run_tmp) # Copy the brianlibdirectory brianlib_dir = os.path.join( os.path.split(inspect.getsourcefile(CPPStandaloneCodeObject))[0], 'brianlib') brianlib_files = copy_directory(brianlib_dir, os.path.join(project_dir, 'brianlib')) for file in brianlib_files: if file.lower().endswith('.cpp'): writer.source_files.append('brianlib/' + file) elif file.lower().endswith('.h'): writer.header_files.append('brianlib/' + file) # Copy the CSpikeQueue implementation spikequeue_h = os.path.join(project_dir, 'brianlib', 'spikequeue.h') shutil.copy2( os.path.join( os.path.split(inspect.getsourcefile(Synapses))[0], 'cspikequeue.cpp'), spikequeue_h) #writer.header_files.append(spikequeue_h) writer.source_files.extend(additional_source_files) writer.header_files.extend(additional_header_files) # Generate the makefile if os.name == 'nt': rm_cmd = 'del' else: rm_cmd = 'rm' makefile_tmp = CPPStandaloneCodeObject.templater.makefile( None, None, source_files=' '.join(writer.source_files), header_files=' '.join(writer.header_files), rm_cmd=rm_cmd) writer.write('makefile', makefile_tmp) # build the project if compile_project: with in_directory(project_dir): if debug: x = os.system('make debug') elif native: x = os.system('make native') else: x = os.system('make') if x == 0: if run_project: if not with_output: stdout = open(os.devnull, 'w') else: stdout = None if os.name == 'nt': x = subprocess.call(['main'] + run_args, stdout=stdout) else: x = subprocess.call(['./main'] + run_args, stdout=stdout) if x: raise RuntimeError("Project run failed") self.has_been_run = True else: raise RuntimeError("Project compilation failed")
def determine_keywords(self): # set up the restricted pointers, these are used so that the compiler # knows there is no aliasing in the pointers, for optimisation pointers = [] # Add additional lines inside the kernel functions kernel_lines = [] # It is possible that several different variable names refer to the # same array. E.g. in gapjunction code, v_pre and v_post refer to the # same array if a group is connected to itself handled_pointers = set() template_kwds = {} # Again, do the import here to avoid a circular dependency. from brian2.devices.device import get_device device = get_device() for varname, var in self.variables.iteritems(): if isinstance(var, ArrayVariable): # This is the "true" array name, not the restricted pointer. array_name = device.get_array_name(var) pointer_name = self.get_array_name(var) if pointer_name in handled_pointers: continue if getattr(var, 'ndim', 1) > 1: continue # multidimensional (dynamic) arrays have to be treated differently restrict = self.restrict # turn off restricted pointers for scalars for safety if var.scalar: restrict = ' ' line = '{0}* {1} {2} = {3};'.format( self.c_data_type(var.dtype), restrict, pointer_name, array_name) pointers.append(line) handled_pointers.add(pointer_name) # set up the functions user_functions = [] support_code = [] hash_defines = [] for varname, variable in self.variables.items(): if isinstance(variable, Function): hd, ps, sc, uf, kl = self._add_user_function(varname, variable) user_functions.extend(uf) support_code.extend(sc) pointers.extend(ps) hash_defines.extend(hd) kernel_lines.extend(kl) support_code.append(self.universal_support_code) # Clock variables (t, dt, timestep) are passed by value to kernels and # need to be translated back into pointers for scalar/vector code. for varname, variable in self.variables.iteritems(): if hasattr(variable, 'owner') and isinstance( variable.owner, Clock): # get arrayname without _ptr suffix (e.g. _array_defaultclock_dt) arrayname = self.get_array_name(variable, pointer=False) line = "const {dtype}* _ptr{arrayname} = &_value{arrayname};" line = line.format(dtype=c_data_type(variable.dtype), arrayname=arrayname) if line not in kernel_lines: kernel_lines.append(line) keywords = { 'pointers_lines': stripped_deindented_lines('\n'.join(pointers)), 'support_code_lines': stripped_deindented_lines('\n'.join(support_code)), 'hashdefine_lines': stripped_deindented_lines('\n'.join(hash_defines)), 'denormals_code_lines': stripped_deindented_lines('\n'.join( self.denormals_to_zero_code())), 'kernel_lines': stripped_deindented_lines('\n'.join(kernel_lines)), 'uses_atomics': self.uses_atomics } keywords.update(template_kwds) return keywords
def generate_codeobj_source(self, writer): #check how many random numbers are needed per step for code_object in self.code_objects.itervalues(): num_occurences_rand = code_object.code.cu_file.count("_rand(") num_occurences_randn = code_object.code.cu_file.count("_randn(") if num_occurences_rand > 0: # synapses_create_generator uses host side random number generation if code_object.template_name != "synapses_create_generator": #first one is alway the definition, so subtract 1 code_object.rand_calls = num_occurences_rand - 1 for i in range(0, code_object.rand_calls): code_object.code.cu_file = code_object.code.cu_file.replace( "_rand(_vectorisation_idx)", "_rand(_vectorisation_idx + {i} * _N)".format(i=i), 1) if num_occurences_randn > 0 and code_object.template_name != "synapses_create_generator": #first one is alway the definition, so subtract 1 code_object.randn_calls = num_occurences_randn - 1 for i in range(0, code_object.randn_calls): code_object.code.cu_file = code_object.code.cu_file.replace( "_randn(_vectorisation_idx)", "_randn(_vectorisation_idx + {i} * _N)".format(i=i), 1) code_object_defs = defaultdict(list) host_parameters = defaultdict(list) device_parameters = defaultdict(list) kernel_variables = defaultdict(list) # Generate data for non-constant values for codeobj in self.code_objects.itervalues(): code_object_defs_lines = [] host_parameters_lines = [] device_parameters_lines = [] kernel_variables_lines = [] additional_code = [] number_elements = "" if hasattr(codeobj, 'owner') and hasattr( codeobj.owner, '_N') and codeobj.owner._N != 0: number_elements = str(codeobj.owner._N) else: number_elements = "_N" for k, v in codeobj.variables.iteritems(): #code objects which only run once if k == "_python_randn" and codeobj.runs_every_tick == False and codeobj.template_name != "synapses_create_generator": additional_code.append(''' //genenerate an array of random numbers on the device float* dev_array_randn; cudaMalloc((void**)&dev_array_randn, sizeof(float)*''' + number_elements + ''' * ''' + str(codeobj.randn_calls) + '''); if(!dev_array_randn) { printf("ERROR while allocating device memory with size %ld\\n", sizeof(float)*''' + number_elements + '''*''' + str(codeobj.randn_calls) + '''); } curandGenerateNormal(random_float_generator, dev_array_randn, ''' + number_elements + '''*''' + str(codeobj.randn_calls) + ''', 0, 1);''') line = "float* _array_{name}_randn".format( name=codeobj.name) device_parameters_lines.append(line) host_parameters_lines.append("dev_array_randn") elif k == "_python_rand" and codeobj.runs_every_tick == False and codeobj.template_name != "synapses_create_generator": additional_code.append(''' //genenerate an array of random numbers on the device float* dev_array_rand; cudaMalloc((void**)&dev_array_rand, sizeof(float)*''' + number_elements + '''*''' + str(codeobj.rand_calls) + '''); if(!dev_array_rand) { printf("ERROR while allocating device memory with size %ld\\n", sizeof(float)*''' + number_elements + '''*''' + str(codeobj.rand_calls) + '''); } curandGenerateUniform(random_float_generator, dev_array_rand, ''' + number_elements + '''*''' + str(codeobj.rand_calls) + ''');''') line = "float* _array_{name}_rand".format( name=codeobj.name) device_parameters_lines.append(line) host_parameters_lines.append("dev_array_rand") elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = thrust::raw_pointer_cast(&dev{dyn_array_name}[0]);' line = line.format( c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) code_object_defs_lines.append(line) line = 'const int _num{k} = dev{dyn_array_name}.size();' line = line.format( k=k, dyn_array_name=dyn_array_name) code_object_defs_lines.append(line) host_parameters_lines.append(array_name) host_parameters_lines.append("_num" + k) line = "{c_type}* par_{array_name}" device_parameters_lines.append( line.format(c_type=c_data_type(v.dtype), array_name=array_name)) line = "int par_num_{array_name}" device_parameters_lines.append( line.format(array_name=k)) line = "{c_type}* _ptr{array_name} = par_{array_name};" kernel_variables_lines.append( line.format(c_type=c_data_type(v.dtype), array_name=array_name)) line = "const int _num{array_name} = par_num_{array_name};" kernel_variables_lines.append( line.format(array_name=k)) else: arrayname = self.get_array_name(v) host_parameters_lines.append("dev" + arrayname) device_parameters_lines.append( "%s* par_%s" % (c_data_type(v.dtype), arrayname)) kernel_variables_lines.append( "%s* _ptr%s = par_%s;" % (c_data_type(v.dtype), arrayname, arrayname)) code_object_defs_lines.append( 'const int _num%s = %s;' % (k, v.size)) kernel_variables_lines.append( 'const int _num%s = %s;' % (k, v.size)) if k.endswith('space'): host_parameters_lines[ -1] += '[current_idx{arrayname}]'.format( arrayname=arrayname) except TypeError: pass # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice for line in code_object_defs_lines: if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) for line in host_parameters_lines: if not line in host_parameters[codeobj.name]: host_parameters[codeobj.name].append(line) for line in device_parameters_lines: if not line in device_parameters[codeobj.name]: device_parameters[codeobj.name].append(line) for line in kernel_variables_lines: if not line in kernel_variables[codeobj.name]: kernel_variables[codeobj.name].append(line) for line in additional_code: if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = self.freeze(codeobj.code.cu_file, ns) if len(host_parameters[codeobj.name]) == 0: host_parameters[codeobj.name].append("0") device_parameters[codeobj.name].append("int dummy") code = code.replace('%CONSTANTS%', '\n\t\t'.join(code_object_defs[codeobj.name])) code = code.replace( '%HOST_PARAMETERS%', ',\n\t\t\t'.join(host_parameters[codeobj.name])) code = code.replace('%DEVICE_PARAMETERS%', ',\n\t'.join(device_parameters[codeobj.name])) code = code.replace('%KERNEL_VARIABLES%', '\n\t'.join(kernel_variables[codeobj.name])) code = code.replace('%CODEOBJ_NAME%', codeobj.name) code = '#include "objects.h"\n' + code writer.write('code_objects/' + codeobj.name + '.cu', code) writer.write('code_objects/' + codeobj.name + '.h', codeobj.code.h_file)
def determine_keywords(self): # set up the restricted pointers, these are used so that the compiler # knows there is no aliasing in the pointers, for optimisation pointers = [] # Add additional lines inside the kernel functions kernel_lines = [] # It is possible that several different variable names refer to the # same array. E.g. in gapjunction code, v_pre and v_post refer to the # same array if a group is connected to itself handled_pointers = set() template_kwds = {} # Again, do the import here to avoid a circular dependency. from brian2.devices.device import get_device device = get_device() for varname, var in self.variables.items(): if isinstance(var, ArrayVariable): # This is the "true" array name, not the restricted pointer. array_name = device.get_array_name(var) pointer_name = self.get_array_name(var) if pointer_name in handled_pointers: continue if getattr(var, 'ndim', 1) > 1: continue # multidimensional (dynamic) arrays have to be treated differently restrict = self.restrict # turn off restricted pointers for scalars for safety if var.scalar: restrict = ' ' # Need to use correct dt type in pointers_lines for single precision, # see #148 if varname == "dt" and prefs.core.default_float_dtype == np.float32: # c_data_type(variable.dtype) is float, but we need double dtype = "double" else: dtype = self.c_data_type(var.dtype) line = '{0}* {1} {2} = {3};'.format(dtype, restrict, pointer_name, array_name) pointers.append(line) handled_pointers.add(pointer_name) # set up the functions user_functions = [] support_code = [] hash_defines = [] added = set() # keep track of functions that were added for varname, variable in list(self.variables.items()): if isinstance(variable, Function): user_func = self._add_user_function(varname, variable, added) if user_func is not None: hd, ps, sc, uf, kl = user_func user_functions.extend(uf) support_code.extend(sc) pointers.extend(ps) hash_defines.extend(hd) kernel_lines.extend(kl) # Generate universal_support_code once when the first codeobject is created. # Can't do it at import time since need access to user preferences # This is a class attribute (not instance attribute). if CUDACodeGenerator.universal_support_code is None: _atomic_support_code = _generate_atomic_support_code() CUDACodeGenerator.universal_support_code = ( _hightype_support_code + _mod_support_code + _floordiv_support_code + _pow_support_code + _atomic_support_code ) support_code.append(CUDACodeGenerator.universal_support_code) # Clock variables (t, dt, timestep) are passed by value to kernels and # need to be translated back into pointers for scalar/vector code. for varname, variable in self.variables.items(): if hasattr(variable, 'owner') and isinstance(variable.owner, Clock): # get arrayname without _ptr suffix (e.g. _array_defaultclock_dt) arrayname = self.get_array_name(variable, prefix='') # kernel_lines appear before dt is cast to float (in scalar_code), hence # we need to still use double (used in kernel parameters), see #148 if varname == "dt" and prefs.core.default_float_dtype == np.float32: # c_data_type(variable.dtype) is float, but we need double dtype = "double" else: dtype = dtype=c_data_type(variable.dtype) line = f"const {dtype}* _ptr{arrayname} = &_value{arrayname};" if line not in kernel_lines: kernel_lines.append(line) keywords = {'pointers_lines': stripped_deindented_lines('\n'.join(pointers)), 'support_code_lines': stripped_deindented_lines('\n'.join(support_code)), 'hashdefine_lines': stripped_deindented_lines('\n'.join(hash_defines)), 'denormals_code_lines': stripped_deindented_lines('\n'.join(self.denormals_to_zero_code())), 'kernel_lines': stripped_deindented_lines('\n'.join(kernel_lines)), 'uses_atomics': self.uses_atomics } keywords.update(template_kwds) return keywords
def build(self, project_dir='output', compile_project=True, run_project=False, debug=True, with_output=True, native=True, additional_source_files=None, additional_header_files=None, main_includes=None, run_includes=None, run_args=None, ): ''' Build the project TODO: more details Parameters ---------- project_dir : str The output directory to write the project to, any existing files will be overwritten. compile_project : bool Whether or not to attempt to compile the project using GNU make. run_project : bool Whether or not to attempt to run the built project if it successfully builds. debug : bool Whether to compile in debug mode. with_output : bool Whether or not to show the ``stdout`` of the built program when run. native : bool Whether or not to compile natively using the ``--march=native`` gcc option. additional_source_files : list of str A list of additional ``.cpp`` files to include in the build. additional_header_files : list of str A list of additional ``.h`` files to include in the build. main_includes : list of str A list of additional header files to include in ``main.cpp``. run_includes : list of str A list of additional header files to include in ``run.cpp``. ''' if additional_source_files is None: additional_source_files = [] if additional_header_files is None: additional_header_files = [] if main_includes is None: main_includes = [] if run_includes is None: run_includes = [] if run_args is None: run_args = [] self.project_dir = project_dir ensure_directory(project_dir) for d in ['code_objects', 'results', 'static_arrays']: ensure_directory(os.path.join(project_dir, d)) writer = CPPWriter(project_dir) logger.debug("Writing C++ standalone project to directory "+os.path.normpath(project_dir)) arange_arrays = sorted([(var, start) for var, start in self.arange_arrays.iteritems()], key=lambda (var, start): var.name) # # Find np arrays in the namespaces and convert them into static # # arrays. Hopefully they are correctly used in the code: For example, # # this works for the namespaces for functions with C++ (e.g. TimedArray # # treats it as a C array) but does not work in places that are # # implicitly vectorized (state updaters, resets, etc.). But arrays # # shouldn't be used there anyway. for code_object in self.code_objects.itervalues(): for name, value in code_object.variables.iteritems(): if isinstance(value, np.ndarray): self.static_arrays[name] = value # write the static arrays logger.debug("static arrays: "+str(sorted(self.static_arrays.keys()))) static_array_specs = [] for name, arr in sorted(self.static_arrays.items()): arr.tofile(os.path.join(project_dir, 'static_arrays', name)) static_array_specs.append((name, c_data_type(arr.dtype), arr.size, name)) # Write the global objects networks = [net() for net in Network.__instances__() if net().name!='_fake_network'] synapses = [S() for S in Synapses.__instances__()] arr_tmp = CPPStandaloneCodeObject.templater.objects( None, None, array_specs=self.arrays, dynamic_array_specs=self.dynamic_arrays, dynamic_array_2d_specs=self.dynamic_arrays_2d, zero_arrays=self.zero_arrays, arange_arrays=arange_arrays, synapses=synapses, clocks=self.clocks, static_array_specs=static_array_specs, networks=networks, ) writer.write('objects.*', arr_tmp) main_lines = [] procedures = [('', main_lines)] runfuncs = {} for func, args in self.main_queue: if func=='run_code_object': codeobj, = args main_lines.append('_run_%s();' % codeobj.name) elif func=='run_network': net, netcode = args main_lines.extend(netcode) elif func=='set_by_array': arrayname, staticarrayname = args code = ''' for(int i=0; i<_num_{staticarrayname}; i++) {{ {arrayname}[i] = {staticarrayname}[i]; }} '''.format(arrayname=arrayname, staticarrayname=staticarrayname) main_lines.extend(code.split('\n')) elif func=='set_array_by_array': arrayname, staticarrayname_index, staticarrayname_value = args code = ''' for(int i=0; i<_num_{staticarrayname_index}; i++) {{ {arrayname}[{staticarrayname_index}[i]] = {staticarrayname_value}[i]; }} '''.format(arrayname=arrayname, staticarrayname_index=staticarrayname_index, staticarrayname_value=staticarrayname_value) main_lines.extend(code.split('\n')) elif func=='insert_code': main_lines.append(args) elif func=='start_run_func': name, include_in_parent = args if include_in_parent: main_lines.append('%s();' % name) main_lines = [] procedures.append((name, main_lines)) elif func=='end_run_func': name, include_in_parent = args name, main_lines = procedures.pop(-1) runfuncs[name] = main_lines name, main_lines = procedures[-1] else: raise NotImplementedError("Unknown main queue function type "+func) # generate the finalisations for codeobj in self.code_objects.itervalues(): if hasattr(codeobj.code, 'main_finalise'): main_lines.append(codeobj.code.main_finalise) # Generate data for non-constant values code_object_defs = defaultdict(list) for codeobj in self.code_objects.itervalues(): lines = [] for k, v in codeobj.variables.iteritems(): if isinstance(v, AttributeVariable): # We assume all attributes are implemented as property-like methods line = 'const {c_type} {varname} = {objname}.{attrname}();' lines.append(line.format(c_type=c_data_type(v.dtype), varname=k, objname=v.obj.name, attrname=v.attribute)) elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = &{dyn_array_name}[0];' line = line.format(c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) lines.append(line) line = 'const int _num{k} = {dyn_array_name}.size();' line = line.format(k=k, dyn_array_name=dyn_array_name) lines.append(line) else: lines.append('const int _num%s = %s;' % (k, v.size)) except TypeError: pass for line in lines: # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = freeze(codeobj.code.cpp_file, ns) code = code.replace('%CONSTANTS%', '\n'.join(code_object_defs[codeobj.name])) code = '#include "objects.h"\n'+code writer.write('code_objects/'+codeobj.name+'.cpp', code) writer.write('code_objects/'+codeobj.name+'.h', codeobj.code.h_file) # The code_objects are passed in the right order to run them because they were # sorted by the Network object. To support multiple clocks we'll need to be # smarter about that. main_tmp = CPPStandaloneCodeObject.templater.main(None, None, main_lines=main_lines, code_objects=self.code_objects.values(), report_func=self.report_func, dt=float(defaultclock.dt), additional_headers=main_includes, ) writer.write('main.cpp', main_tmp) # Generate the run functions run_tmp = CPPStandaloneCodeObject.templater.run(None, None, run_funcs=runfuncs, code_objects=self.code_objects.values(), additional_headers=run_includes, ) writer.write('run.*', run_tmp) # Copy the brianlibdirectory brianlib_dir = os.path.join(os.path.split(inspect.getsourcefile(CPPStandaloneCodeObject))[0], 'brianlib') brianlib_files = copy_directory(brianlib_dir, os.path.join(project_dir, 'brianlib')) for file in brianlib_files: if file.lower().endswith('.cpp'): writer.source_files.append('brianlib/'+file) elif file.lower().endswith('.h'): writer.header_files.append('brianlib/'+file) # Copy the CSpikeQueue implementation spikequeue_h = os.path.join(project_dir, 'brianlib', 'spikequeue.h') shutil.copy2(os.path.join(os.path.split(inspect.getsourcefile(Synapses))[0], 'cspikequeue.cpp'), spikequeue_h) #writer.header_files.append(spikequeue_h) writer.source_files.extend(additional_source_files) writer.header_files.extend(additional_header_files) # Generate the makefile if os.name=='nt': rm_cmd = 'del' else: rm_cmd = 'rm' makefile_tmp = CPPStandaloneCodeObject.templater.makefile(None, None, source_files=' '.join(writer.source_files), header_files=' '.join(writer.header_files), rm_cmd=rm_cmd) writer.write('makefile', makefile_tmp) # build the project if compile_project: with in_directory(project_dir): if debug: x = os.system('make debug') elif native: x = os.system('make native') else: x = os.system('make') if x==0: if run_project: if not with_output: stdout = open(os.devnull, 'w') else: stdout = None if os.name=='nt': x = subprocess.call(['main'] + run_args, stdout=stdout) else: x = subprocess.call(['./main'] + run_args, stdout=stdout) if x: raise RuntimeError("Project run failed") self.has_been_run = True else: raise RuntimeError("Project compilation failed")
def build(self, directory='output', compile=True, run=True, debug=False, clean=True, with_output=True, native=True, additional_source_files=None, additional_header_files=None, main_includes=None, run_includes=None, run_args=None, **kwds): ''' Build the project TODO: more details Parameters ---------- directory : str The output directory to write the project to, any existing files will be overwritten. compile : bool Whether or not to attempt to compile the project run : bool Whether or not to attempt to run the built project if it successfully builds. debug : bool Whether to compile in debug mode. with_output : bool Whether or not to show the ``stdout`` of the built program when run. native : bool Whether or not to compile for the current machine's architecture (best for speed, but not portable) clean : bool Whether or not to clean the project before building additional_source_files : list of str A list of additional ``.cpp`` files to include in the build. additional_header_files : list of str A list of additional ``.h`` files to include in the build. main_includes : list of str A list of additional header files to include in ``main.cpp``. run_includes : list of str A list of additional header files to include in ``run.cpp``. ''' renames = {'project_dir': 'directory', 'compile_project': 'compile', 'run_project': 'run'} if len(kwds): msg = '' for kwd in kwds: if kwd in renames: msg += ("Keyword argument '%s' has been renamed to " "'%s'. ") % (kwd, renames[kwd]) else: msg += "Unknown keyword argument '%s'. " % kwd raise TypeError(msg) if additional_source_files is None: additional_source_files = [] if additional_header_files is None: additional_header_files = [] if main_includes is None: main_includes = [] if run_includes is None: run_includes = [] if run_args is None: run_args = [] self.project_dir = directory ensure_directory(directory) compiler, extra_compile_args = get_compiler_and_args() compiler_flags = ' '.join(extra_compile_args) for d in ['code_objects', 'results', 'static_arrays']: ensure_directory(os.path.join(directory, d)) writer = CPPWriter(directory) # Get the number of threads if specified in an openmp context nb_threads = prefs.devices.cpp_standalone.openmp_threads # If the number is negative, we need to throw an error if (nb_threads < 0): raise ValueError('The number of OpenMP threads can not be negative !') logger.debug("Writing C++ standalone project to directory "+os.path.normpath(directory)) if nb_threads > 0: logger.warn("OpenMP code is not yet well tested, and may be inaccurate.", "openmp", once=True) logger.debug("Using OpenMP with %d threads " % nb_threads) for codeobj in self.code_objects.itervalues(): if not 'IS_OPENMP_COMPATIBLE' in codeobj.template_source: raise RuntimeError(("Code object '%s' uses the template %s " "which is not compatible with " "OpenMP.") % (codeobj.name, codeobj.template_name)) arange_arrays = sorted([(var, start) for var, start in self.arange_arrays.iteritems()], key=lambda (var, start): var.name) # # Find np arrays in the namespaces and convert them into static # # arrays. Hopefully they are correctly used in the code: For example, # # this works for the namespaces for functions with C++ (e.g. TimedArray # # treats it as a C array) but does not work in places that are # # implicitly vectorized (state updaters, resets, etc.). But arrays # # shouldn't be used there anyway. for code_object in self.code_objects.itervalues(): for name, value in code_object.variables.iteritems(): if isinstance(value, np.ndarray): self.static_arrays[name] = value # write the static arrays logger.debug("static arrays: "+str(sorted(self.static_arrays.keys()))) static_array_specs = [] for name, arr in sorted(self.static_arrays.items()): arr.tofile(os.path.join(directory, 'static_arrays', name)) static_array_specs.append((name, c_data_type(arr.dtype), arr.size, name)) # Write the global objects networks = [net() for net in Network.__instances__() if net().name != '_fake_network'] synapses = [] for net in networks: net_synapses = [s for s in net.objects if isinstance(s, Synapses)] synapses.extend(net_synapses) # We don't currently support pathways with scalar delays for synapse_obj in net_synapses: for pathway in synapse_obj._pathways: if not isinstance(pathway.variables['delay'], DynamicArrayVariable): error_msg = ('The "%s" pathway uses a scalar ' 'delay (instead of a delay per synapse). ' 'This is not yet supported. Do not ' 'specify a delay in the Synapses(...) ' 'call but instead set its delay attribute ' 'afterwards.') % (pathway.name) raise NotImplementedError(error_msg) # Not sure what the best place is to call Network.after_run -- at the # moment the only important thing it does is to clear the objects stored # in magic_network. If this is not done, this might lead to problems # for repeated runs of standalone (e.g. in the test suite). for net in networks: net.after_run() arr_tmp = CPPStandaloneCodeObject.templater.objects( None, None, array_specs=self.arrays, dynamic_array_specs=self.dynamic_arrays, dynamic_array_2d_specs=self.dynamic_arrays_2d, zero_arrays=self.zero_arrays, arange_arrays=arange_arrays, synapses=synapses, clocks=self.clocks, static_array_specs=static_array_specs, networks=networks) writer.write('objects.*', arr_tmp) main_lines = [] procedures = [('', main_lines)] runfuncs = {} for func, args in self.main_queue: if func=='run_code_object': codeobj, = args main_lines.append('_run_%s();' % codeobj.name) elif func=='run_network': net, netcode = args main_lines.extend(netcode) elif func=='set_by_array': arrayname, staticarrayname = args code = ''' {pragma} for(int i=0; i<_num_{staticarrayname}; i++) {{ {arrayname}[i] = {staticarrayname}[i]; }} '''.format(arrayname=arrayname, staticarrayname=staticarrayname, pragma=openmp_pragma('static')) main_lines.extend(code.split('\n')) elif func=='set_by_single_value': arrayname, item, value = args code = '{arrayname}[{item}] = {value};'.format(arrayname=arrayname, item=item, value=value) main_lines.extend([code]) elif func=='set_array_by_array': arrayname, staticarrayname_index, staticarrayname_value = args code = ''' {pragma} for(int i=0; i<_num_{staticarrayname_index}; i++) {{ {arrayname}[{staticarrayname_index}[i]] = {staticarrayname_value}[i]; }} '''.format(arrayname=arrayname, staticarrayname_index=staticarrayname_index, staticarrayname_value=staticarrayname_value, pragma=openmp_pragma('static')) main_lines.extend(code.split('\n')) elif func=='insert_code': main_lines.append(args) elif func=='start_run_func': name, include_in_parent = args if include_in_parent: main_lines.append('%s();' % name) main_lines = [] procedures.append((name, main_lines)) elif func=='end_run_func': name, include_in_parent = args name, main_lines = procedures.pop(-1) runfuncs[name] = main_lines name, main_lines = procedures[-1] else: raise NotImplementedError("Unknown main queue function type "+func) # generate the finalisations for codeobj in self.code_objects.itervalues(): if hasattr(codeobj.code, 'main_finalise'): main_lines.append(codeobj.code.main_finalise) # Generate data for non-constant values code_object_defs = defaultdict(list) for codeobj in self.code_objects.itervalues(): lines = [] for k, v in codeobj.variables.iteritems(): if isinstance(v, AttributeVariable): # We assume all attributes are implemented as property-like methods line = 'const {c_type} {varname} = {objname}.{attrname}();' lines.append(line.format(c_type=c_data_type(v.dtype), varname=k, objname=v.obj.name, attrname=v.attribute)) elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = &{dyn_array_name}[0];' line = line.format(c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) lines.append(line) line = 'const int _num{k} = {dyn_array_name}.size();' line = line.format(k=k, dyn_array_name=dyn_array_name) lines.append(line) else: lines.append('const int _num%s = %s;' % (k, v.size)) except TypeError: pass for line in lines: # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = freeze(codeobj.code.cpp_file, ns) code = code.replace('%CONSTANTS%', '\n'.join(code_object_defs[codeobj.name])) code = '#include "objects.h"\n'+code writer.write('code_objects/'+codeobj.name+'.cpp', code) writer.write('code_objects/'+codeobj.name+'.h', codeobj.code.h_file) # The code_objects are passed in the right order to run them because they were # sorted by the Network object. To support multiple clocks we'll need to be # smarter about that. main_tmp = CPPStandaloneCodeObject.templater.main(None, None, main_lines=main_lines, code_objects=self.code_objects.values(), report_func=self.report_func, dt=float(defaultclock.dt), additional_headers=main_includes, ) writer.write('main.cpp', main_tmp) if compiler=='msvc': std_move = 'std::move' else: std_move = '' network_tmp = CPPStandaloneCodeObject.templater.network(None, None, std_move=std_move) writer.write('network.*', network_tmp) synapses_classes_tmp = CPPStandaloneCodeObject.templater.synapses_classes(None, None) writer.write('synapses_classes.*', synapses_classes_tmp) # Generate the run functions run_tmp = CPPStandaloneCodeObject.templater.run(None, None, run_funcs=runfuncs, code_objects=self.code_objects.values(), additional_headers=run_includes, ) writer.write('run.*', run_tmp) # Copy the brianlibdirectory brianlib_dir = os.path.join(os.path.split(inspect.getsourcefile(CPPStandaloneCodeObject))[0], 'brianlib') brianlib_files = copy_directory(brianlib_dir, os.path.join(directory, 'brianlib')) for file in brianlib_files: if file.lower().endswith('.cpp'): writer.source_files.append('brianlib/'+file) elif file.lower().endswith('.h'): writer.header_files.append('brianlib/'+file) # Copy the CSpikeQueue implementation shutil.copy2(os.path.join(os.path.split(inspect.getsourcefile(Synapses))[0], 'cspikequeue.cpp'), os.path.join(directory, 'brianlib', 'spikequeue.h')) shutil.copy2(os.path.join(os.path.split(inspect.getsourcefile(Synapses))[0], 'stdint_compat.h'), os.path.join(directory, 'brianlib', 'stdint_compat.h')) writer.source_files.extend(additional_source_files) writer.header_files.extend(additional_header_files) if compiler=='msvc': if native: arch_flag = '' try: from cpuinfo import cpuinfo res = cpuinfo.get_cpu_info() if 'sse' in res['flags']: arch_flag = '/arch:SSE' if 'sse2' in res['flags']: arch_flag = '/arch:SSE2' except ImportError: logger.warn('Native flag for MSVC compiler requires installation of the py-cpuinfo module') compiler_flags += ' '+arch_flag if nb_threads>1: openmp_flag = '/openmp' else: openmp_flag = '' # Generate the visual studio makefile source_bases = [fname.replace('.cpp', '').replace('/', '\\') for fname in writer.source_files] win_makefile_tmp = CPPStandaloneCodeObject.templater.win_makefile( None, None, source_bases=source_bases, compiler_flags=compiler_flags, openmp_flag=openmp_flag, ) writer.write('win_makefile', win_makefile_tmp) else: # Generate the makefile if os.name=='nt': rm_cmd = 'del *.o /s\n\tdel main.exe $(DEPS)' else: rm_cmd = 'rm $(OBJS) $(PROGRAM) $(DEPS)' makefile_tmp = CPPStandaloneCodeObject.templater.makefile(None, None, source_files=' '.join(writer.source_files), header_files=' '.join(writer.header_files), compiler_flags=compiler_flags, rm_cmd=rm_cmd) writer.write('makefile', makefile_tmp) # build the project if compile: with in_directory(directory): if compiler=='msvc': # TODO: handle debug if debug: logger.warn('Debug flag currently ignored for MSVC') vcvars_search_paths = [ # futureproofing! r'c:\Program Files\Microsoft Visual Studio 15.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 15.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 14.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 13.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 13.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 12.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 11.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 10.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\vcvarsall.bat', ] vcvars_loc = prefs['codegen.cpp.msvc_vars_location'] if vcvars_loc=='': for fname in vcvars_search_paths: if os.path.exists(fname): vcvars_loc = fname break if vcvars_loc=='': raise IOError("Cannot find vcvarsall.bat on standard search path.") # TODO: copy vcvars and make replacements for 64 bit automatically arch_name = prefs['codegen.cpp.msvc_architecture'] if arch_name=='': mach = platform.machine() if mach=='AMD64': arch_name = 'x86_amd64' else: arch_name = 'x86' vcvars_cmd = '"{vcvars_loc}" {arch_name}'.format( vcvars_loc=vcvars_loc, arch_name=arch_name) make_cmd = 'nmake /f win_makefile' if os.path.exists('winmake.log'): os.remove('winmake.log') with std_silent(debug): if clean: os.system('%s >>winmake.log 2>&1 && %s clean >>winmake.log 2>&1' % (vcvars_cmd, make_cmd)) x = os.system('%s >>winmake.log 2>&1 && %s >>winmake.log 2>&1' % (vcvars_cmd, make_cmd)) if x!=0: raise RuntimeError("Project compilation failed") else: with std_silent(debug): if clean: os.system('make clean') if debug: x = os.system('make debug') elif native: x = os.system('make native') else: x = os.system('make') if x!=0: raise RuntimeError("Project compilation failed") if run: if not with_output: stdout = open(os.devnull, 'w') else: stdout = None if os.name=='nt': x = subprocess.call(['main'] + run_args, stdout=stdout) else: x = subprocess.call(['./main'] + run_args, stdout=stdout) if x: raise RuntimeError("Project run failed") self.has_been_run = True