def compile_(self): self._generate_h() self._generate_cu() # TODO nvcc path if CUDAGenerator.compile_( src='{}/code_gen/cudamemop.cu'.format(pkg_dir), output='{}/so_gen/cudamemop.so'.format(pkg_dir)): self._so = cdll.LoadLibrary( '{}/so_gen/cudamemop.so'.format(pkg_dir)) for i in self.types: getattr(self._so, 'gpu_malloc_{}'.format(i)).restype = POINTER( getattr(ctypes, 'c_%s' % i)) getattr(self._so, 'to_gpu_{}'.format(i)).restype = POINTER( getattr(ctypes, 'c_%s' % i)) getattr(self._so, 'from_gpu_{}'.format(i)).restype = POINTER( getattr(ctypes, 'c_%s' % i)) else: self._so = None raise EnvironmentError('Compile file connection.data.so failed')
def compile_(self): if self._cls_so is None: self._generate_h() self._generate_data_cu() if CUDAGenerator.compile_( src='{}/code_gen/connection.data.cu'.format(pkg_dir), output='{}/so_gen/connection.data.so'.format(pkg_dir)): self._cls_so = cdll.LoadLibrary( '{}/so_gen/connection.data.so'.format(pkg_dir)) self._cls_so.to_gpu_connection.restype = POINTER( self._cls_ctype) self._cls_so.from_gpu_connection.restype = POINTER( self._cls_ctype) else: self._cls_so = None raise EnvironmentError( 'Compile file connection.data.so failed') self._so = self._cls_so
def compile_(self): self._generate_runtime() src = '{}/c_code/runtime.cu {}/code_gen/runtime.cu'.format( pkg_dir, pkg_dir) for model in self.neuron_models: src += ' {}/code_gen/{}.compute.cu '.format( pkg_dir, model.name.lower()) for model in self.synapse_models: src += ' {}/code_gen/{}.compute.cu '.format( pkg_dir, model.name.lower()) if CUDAGenerator.compile_( src=src, output='{}/so_gen/runtime.so'.format(pkg_dir)): self._so = cdll.LoadLibrary('{}/so_gen/runtime.so'.format(pkg_dir)) else: self._so = None raise EnvironmentError('Compile file runtime.so failed') return
def compile_(self): self._generate_h() self._generate_data_cu() self.model.generate_compute_cu() if CUDAGenerator.compile_( # TODO: compute.cu src='{}/code_gen/{}.data.cu'.format(pkg_dir, self.model.name.lower()), # src='{}/code_gen/{}.data.cu {}/code_gen/{}.compute.cu' # .format(pkg_dir, self.model.name.lower(), pkg_dir, self.model.name.lower()), output='{}/so_gen/{}.so'.format(pkg_dir, self.model.name.lower())): self._so = cdll.LoadLibrary('{}/so_gen/{}.so'.format( pkg_dir, self.model.name.lower())) getattr(self._so, "to_gpu_{}".format( self.model.name.lower())).restype = POINTER(self.c_type) getattr(self._so, "from_gpu_{}".format( self.model.name.lower())).restype = POINTER(self.c_type) else: self._so = None raise EnvironmentError( 'Compile file {}/so_gen/{}.so failed'.format( pkg_dir, self.model.name.lower()))
def _generate_data_cu(self): cu_gen = CUDAGenerator( '{}/code_gen/connection.data.cu'.format(pkg_dir)) cu_gen.blank_line(2) if self.debug: cu_gen.include_std('stdio.h') cu_gen.include_std('stdlib.h') cu_gen.blank_line() cu_gen.include('../c_code/helper_cuda.h') cu_gen.include('connection.h') cu_gen.blank_line(2) cu_gen.line_no_end("CConnection * to_gpu_connection(CConnection *cpu)", 0) cu_gen.open_brace() cu_gen.line( 'CConnection * gpu = (CConnection*)malloc(sizeof(CConnection))') cu_gen.line('gpu->n_len = cpu->n_len') cu_gen.line('gpu->r_n_len = cpu->r_n_len') cu_gen.line('gpu->s_len = cpu->s_len') cu_gen.to_gpu(ret='gpu->delay_start', cpu='cpu->delay_start', type_='int', num='cpu->n_len') cu_gen.to_gpu(ret='gpu->delay_num', cpu='cpu->delay_num', type_='int', num='cpu->n_len') cu_gen.to_gpu(ret='gpu->rev_delay_start', cpu='cpu->rev_delay_start', type_='int', num='cpu->r_n_len') cu_gen.to_gpu(ret='gpu->rev_delay_num', cpu='cpu->rev_delay_num', type_='int', num='cpu->r_n_len') cu_gen.to_gpu(ret='gpu->rev_map2sid', cpu='cpu->rev_map2sid', type_='int', num='cpu->s_len') cu_gen.line('CConnection * ret = NULL') cu_gen.to_gpu(ret='ret', cpu='gpu', type_='CConnection') if self.debug: cu_gen.line(line=r'printf("GPU CConnection Pointer: %p\n", ret)') cu_gen.line( line= r'printf("GPU n_len: %d r_n_len: %d s_len: %d\n", gpu->n_len, gpu->r_n_len, gpu->s_len)' ) cu_gen.line('return ret') cu_gen.close_brace() cu_gen.blank_line() cu_gen.line_no_end( "CConnection * from_gpu_connection(CConnection *gpu)", 0) cu_gen.open_brace() cu_gen.from_gpu(gpu='gpu', ret='ret', type_='CConnection') if self.debug: cu_gen.line(line=r'printf("CPU CConnection Pointer: %p\n", ret)') cu_gen.line( line= r'printf("CPU n_len: %d r_n_len: %d s_len: %d\n", ret->n_len, ret->r_n_len, ret->s_len)' ) cu_gen.line('return ret') cu_gen.close_brace() cu_gen.blank_line() cu_gen.close() return
def _generate_cu(self): cu_gen = CUDAGenerator('{}/code_gen/cudamemop.cu'.format(pkg_dir)) cu_gen.blank_line(2) cu_gen.include("../c_code/helper_cuda.h") cu_gen.include("cudamemop.h") cu_gen.blank_line(2) for i in self.types: cu_gen.line_no_end("{}* gpu_malloc_{}(int size)".format(i, i), 0) cu_gen.open_brace(0) cu_gen.line('{} * gpu = NULL'.format(i)) cu_gen.malloc_gpu(ret='gpu', type_=str(i), num='size') cu_gen.line(line='return gpu') cu_gen.close_brace(0) cu_gen.blank_line() cu_gen.line_no_end( "void cpu2gpu_{}({} *cpu, {} *gpu, int size)".format(i, i, i), 0) cu_gen.open_brace(0) cu_gen.cpu_to_gpu(gpu='gpu', cpu='cpu', type_=str(i), num='size') cu_gen.close_brace(0) cu_gen.blank_line() cu_gen.line_no_end( "void gpu2cpu_{}({} *gpu, {} *cpu, int size)".format(i, i, i), 0) cu_gen.open_brace(0) cu_gen.gpu_to_cpu(gpu='gpu', cpu='cpu', type_=str(i), num='size') cu_gen.close_brace(0) cu_gen.blank_line() cu_gen.line_no_end( "{}* to_gpu_{}({} *cpu, int size)".format(i, i, i), 0) cu_gen.open_brace(0) cu_gen.line(line='{} * gpu = NULL'.format(i)) cu_gen.to_gpu(ret='gpu', cpu='cpu', type_=str(i), num='size') cu_gen.line(line='return gpu', tab=1) cu_gen.close_brace(0) cu_gen.blank_line() cu_gen.line_no_end( "{}* from_gpu_{}({} *gpu, int size)".format(i, i, i), 0) cu_gen.open_brace(0) cu_gen.from_gpu(gpu='gpu', ret='cpu', type_=str(i), num='size') cu_gen.line(line='return cpu', tab=1) cu_gen.close_brace(0) cu_gen.blank_line() cu_gen.blank_line() cu_gen.close() return
def _generate_runtime(self): h_gen = CGenerator('{}/code_gen/runtime.h'.format(pkg_dir)) h_gen.if_define('runtime.h') h_gen.blank_line(2) h_gen.include("connection.h") h_gen.blank_line(2) h_gen.block('const int MAX_DELAY = {};'.format(self.max_delay)) h_gen.block('const int MIN_DELAY = {};'.format(self.min_delay)) h_gen.block('const {} G_MAX = {};'.format(real, self.g_max)) h_gen.block('const {} G_MIN = {};'.format(real, self.g_min)) for i in range(len(self.neuron_models)): block_size = 32 h_gen.block("const int {}_BLOCK_SIZE = {};".format( self.neuron_models[i].name.upper(), int(block_size))) h_gen.block("const int {}_GRID_SIZE = {};".format( self.neuron_models[i].name.upper(), math.ceil(self.neuron_nums[i + 1] / block_size))) for i in range(len(self.synapse_models)): block_size = 128 h_gen.block("const int {}_BLOCK_SIZE = {};".format( self.synapse_models[i].name.upper(), int(block_size))) h_gen.block("const int {}_GRID_SIZE = {};".format( self.synapse_models[i].name.upper(), math.ceil(self.synapse_nums[i + 1] / block_size))) h_gen.blank_line() h_gen.block("const int MAX_BLOCK_SIZE = {};".format( self.max_block_size)) h_gen.block("const int FIRED_TABLE_SIZE = {};".format(self.neuron_num)) h_gen.block('extern __device__ int * g_fired_table;') h_gen.block('extern __device__ int * g_fired_table_sizes;') for model in self.neuron_models: h_gen.block('extern __device__ int * g_active_{}_table;'.format( model.name.lower())) h_gen.block('extern __device__ int g_active_{}_table_size;'.format( model.name.lower())) for model in self.synapse_models: h_gen.block( 'extern __device__ CConnection * g_connection_{};'.format( model.name.lower())) external = set() for model in self.neuron_models: external |= set(model.parameters['external']) for model in self.synapse_models: external |= set(model.parameters['external']) external -= set('t') for i in external: h_gen.block('extern __device__ {} * {};'.format(real, i)) h_gen.blank_line() h_gen.block('extern "C" {') h_gen.block('\tvoid **init_runtime(CConnection **connections);') h_gen.block('}') h_gen.blank_line() h_gen.block( '__device__ int commit2globalTable(int *shared_buf, volatile unsigned int size, ' 'int *global_buf, int * global_size, int offset);') h_gen.end_if_define('runtime.h') h_gen.close() cu_gen = CUDAGenerator('{}/code_gen/runtime.cu'.format(pkg_dir)) cu_gen.include_std("stdio.h") cu_gen.include('../c_code/helper_cuda.h') cu_gen.include("runtime.h") cu_gen.blank_line(2) cu_gen.block('__device__ int * g_fired_table;') cu_gen.block('__device__ int * g_fired_table_sizes;') for model in self.neuron_models: cu_gen.block('__device__ int * g_active_{}_table;'.format( model.name.lower())) cu_gen.block('__device__ int g_active_{}_table_size;'.format( model.name.lower())) for model in self.synapse_models: cu_gen.block('__device__ CConnection * g_connection_{};'.format( model.name.lower())) external = set() for model in self.neuron_models: external |= set(model.parameters['external']) for model in self.synapse_models: external |= set(model.parameters['external']) external -= set('t') for i in external: cu_gen.block('__device__ {} * {};'.format(real, i)) cu_gen.blank_line(2) cu_gen.block('void **init_runtime(CConnection ** connections)') cu_gen.block('{') cu_gen.block('\tint zero = 0;') cu_gen.block('\tint *p_int = NULL;') cu_gen.block('\t{} *p_{} = NULL;'.format(real, real)) cu_gen.blank_line() cu_gen.block( '\tvoid **ret = static_cast<void**>(malloc(sizeof(void*) * {}));'. format(2)) cu_gen.blank_line() cu_gen.malloc_symbol(symbol='g_fired_table_sizes', gpu='p_int', type_='int', num='{}'.format(self.max_delay + 1)) cu_gen.block('\tret[0] = static_cast<void*>(p_int);') cu_gen.malloc_symbol(symbol='g_fired_table', gpu='p_int', type_='int', num='{}'.format(self.neuron_num * (self.max_delay + 1))) cu_gen.block('\tret[1] = static_cast<void*>(p_int);') # cu_gen.block('\tprintf("\\n%p, %p, %p\\n", ret, ret[0], ret[1]);') cu_gen.blank_line() for model in self.neuron_models: cu_gen.cu_line( 'cudaMemcpyToSymbol(g_active_{}_table_size, &zero, sizeof(int))' .format(model.name.lower())) cu_gen.malloc_symbol(symbol='g_active_{}_table'.format( model.name.lower()), gpu='p_int', type_='int', num='{}'.format(self.neuron_num)) cu_gen.block('\n') for i in external: cu_gen.malloc_symbol(symbol='{}'.format(i), gpu='p_{}'.format(real), type_=real, num='{}'.format(self.neuron_num)) for i, model in enumerate(self.synapse_models): cu_gen.cu_line( 'cudaMemcpyToSymbol(g_connection_{}, &(connections[{}]), sizeof(CConnection*))' .format(model.name.lower(), i)) cu_gen.block('\treturn ret;') cu_gen.block('}') cu_gen.close()
def generate_data_cu(self, debug=False): cu_gen = CUDAGenerator('{}/code_gen/{}.data.cu'.format( pkg_dir, self.name.lower())) cu_gen.blank_line(2) cu_gen.include_std('stdlib.h') cu_gen.blank_line() cu_gen.include("../c_code/helper_cuda.h") cu_gen.include("{}.h".format(self.name.lower())) cu_gen.blank_line(2) cu_gen.line_no_end( "{} * to_gpu_{}({} *cpu, int num)".format(self.name.capitalize(), self.name.lower(), self.name.capitalize()), 0) cu_gen.open_brace() cu_gen.malloc(ret='gpu', type_=self.name.capitalize()) for i in self.parameters['special']: if i != 'delay': cu_gen.to_gpu(ret='gpu->p_{}'.format(i), cpu='cpu->p_{}'.format(i), num='num', type_='int') for i in self.parameters['constant']: cu_gen.to_gpu(ret='gpu->p_{}'.format(i), cpu='cpu->p_{}'.format(i), num='num', type_=real) for i in self.parameters['variable']: cu_gen.to_gpu(ret='gpu->p_{}'.format(i), cpu='cpu->p_{}'.format(i), num='num', type_=real) cu_gen.line('{} * ret = NULL'.format(self.name.capitalize())) cu_gen.to_gpu(ret='ret', cpu='gpu', num='1', type_=self.name.capitalize()) cu_gen.line('return ret') cu_gen.close_brace() cu_gen.blank_line() cu_gen.line_no_end( "{} * from_gpu_{}({} *gpu, int num)".format( self.name.capitalize(), self.name.lower(), self.name.capitalize()), 0) cu_gen.open_brace() cu_gen.from_gpu(ret='cpu', gpu='gpu', type_=self.name.capitalize()) cu_gen.line('return cpu') cu_gen.close_brace() cu_gen.blank_line() cu_gen.close() return
def generate_compute_cu(self, debug=False): cu_gen = CUDAGenerator('{}/code_gen/{}.compute.cu'.format(pkg_dir, self.name)) cu_gen.include("runtime.h") cu_gen.include("{}.h".format(self.name.lower())) cu_gen.blank_line(2) # cu_gen.line("__global__ void find_{}_gpu({} *data, int num, int start_id);" # .format(self.name.lower(), self.name.capitalize()), 0) # cu_gen.line("__global__ void update_{}_gpu({} *data, int num, int start_id, int t);" # .format(self.name.lower(), self.name.capitalize()), 0) # cu_gen.blank_line(1) cu_gen.block("__device__ {} _clip({} a, {} min, {} max)".format(real, real, real, real)) cu_gen.block("{") cu_gen.block("\tif (a < min) {") cu_gen.block("\t\treturn min;") cu_gen.block("\t} else if (a > max) {") cu_gen.block("\t\treturn max;") cu_gen.block("\t} else {") cu_gen.block("\t\treturn a;") cu_gen.block("\t}") cu_gen.block("}") cu_gen.block("__global__ void update_{}_gpu({} *data, int num, int start_id, int t)" .format(self.name.lower(), self.name.capitalize())) cu_gen.block("{") cu_gen.block("\tfor (int delta_t={}; delta_t<={}; delta_t++) {{".format("MIN_DELAY", "MAX_DELAY")) cu_gen.block("\t\tint block_idx = blockIdx.x;") cu_gen.block("\t\tint delay_idx = (t + {} + 1 - delta_t) % ( {} + 1);".format("MAX_DELAY", "MAX_DELAY")) # cu_gen.block("\t\tint delay_idx = (t + {} - delta_t) % ( {} + 1);".format("MAX_DELAY", "MAX_DELAY")) cu_gen.block("\t\tint fired_size = g_fired_table_sizes[delay_idx];") cu_gen.block("\t\tint num_per_block = (fired_size - 1) / gridDim.x + 1;") cu_gen.block("\t\tint block_nums_minus_1 = (fired_size - 1) / num_per_block;") cu_gen.block("\t\tint fired_size_block = 0;") cu_gen.block("\t\tif (block_idx == block_nums_minus_1) {") cu_gen.block("\t\tfired_size_block = fired_size - block_idx * num_per_block;") cu_gen.block("\t\t} else if (block_idx < block_nums_minus_1) {") cu_gen.block("\t\tfired_size_block = num_per_block;") cu_gen.block("\t\t} else {") cu_gen.block("\t\tfired_size_block = 0;") cu_gen.block("\t\t}") cu_gen.block("\t\tfor (int idx = 0; idx < fired_size_block; idx++) {") cu_gen.block("\t\t\tint nid = g_fired_table[delay_idx * FIRED_TABLE_SIZE + (block_idx) * num_per_block + idx];") cu_gen.block("\t\t\tint start_loc = g_connection_{}->delay_start[delta_t - MIN_DELAY + nid * (MAX_DELAY - MIN_DELAY + 1)];" .format(self.name.lower())) cu_gen.block("\t\t\tint synapse_num = g_connection_{}->delay_num[delta_t - MIN_DELAY + nid * (MAX_DELAY - MIN_DELAY + 1)];" .format(self.name.lower())) # cu_gen.block("\t\t\tif (threadIdx.x == 0) {") # cu_gen.block("\t\t\t\tgLayerInput[nid]++;") # cu_gen.block("\t\t\t}") cu_gen.block("\t\t\tfor (int j=threadIdx.x; j < synapse_num; j += blockDim.x) {") cu_gen.block("\t\t\t\tint sid = j+start_loc;") cu_gen.block("\t\t\t\t{} weight = data->p_weight[sid];".format(real)) cu_gen.block("\t\t\t\tif (weight >= 0) {") cu_gen.block("\t\t\t\t\tatomicAdd(&(g_i_exec[data->p_dst[sid]]), weight);") cu_gen.block("\t\t\t\t} else {") cu_gen.block("\t\t\t\t\tatomicAdd(&(g_i_inh[data->p_dst[sid]]), weight);") cu_gen.block("\t\t\t\t}") cu_gen.blank_line() if self.pre_learn: cu_gen.block("\t\t\t\tdata->p_apre[sid] *= exp((data->p_last_update[sid] - t) / (data->p_tau_pre[sid]));") cu_gen.block("\t\t\t\tdata->p_apost[sid] *= exp((data->p_last_update[sid] - t) / (data->p_tau_post[sid]));") cu_gen.block("") cu_gen.block("\t\t\t\tdata->p_apre[sid] += data->p_d_apre[sid];") cu_gen.block("\t\t\t\tdata->p_weight[sid] = _clip(weight + data->p_apost[sid], {}, {});" .format("G_MIN", "G_MAX")) cu_gen.block("\t\t\t\tdata->p_last_update[sid] = t;") cu_gen.block("\t\t\t}") cu_gen.block("\t\t}") cu_gen.block("\t\t__syncthreads();") cu_gen.block("\t}") cu_gen.block("}") cu_gen.blank_line() if self.post_learn: # Nothing to do with delay cu_gen.block("__global__ void learn_{}_post({} * data, int num, int start_id, int t)" .format(self.name.lower(), self.name.capitalize())) cu_gen.block("{") cu_gen.block("\tint block_idx = blockIdx.x;") cu_gen.block("\tint delay_idx = t%(MAX_DELAY+1);") cu_gen.block("\tint fired_size = g_fired_table_sizes[delay_idx];") cu_gen.block("\tint num_per_block = (fired_size - 1) / gridDim.x + 1;") cu_gen.block("\tint block_nums_minus_1 = (fired_size - 1) / num_per_block;") cu_gen.block("\tint fired_size_block = 0;") cu_gen.block("\tif (block_idx == block_nums_minus_1) {") cu_gen.block("\t\tfired_size_block = fired_size - block_idx * num_per_block;") cu_gen.block("\t} else if (block_idx < block_nums_minus_1) {") cu_gen.block("\t\tfired_size_block = num_per_block;") cu_gen.block("\t} else {") cu_gen.block("\t\tfired_size_block = 0;") cu_gen.block("\t}") cu_gen.blank_line() cu_gen.block("\tfor (int idx = 0; idx < fired_size_block; idx++) {") cu_gen.block("\t\tint nid = g_fired_table[delay_idx * FIRED_TABLE_SIZE + (block_idx) * num_per_block + idx];") cu_gen.block("\t\tint start_loc = g_connection_{}->rev_delay_start[nid];".format(self.name.lower())) cu_gen.block("\t\tint synapse_num = g_connection_{}->rev_delay_num[nid];".format(self.name.lower())) cu_gen.block("\t\tfor (int j=threadIdx.x; j<synapse_num; j+=blockDim.x) {") cu_gen.block("\t\t\tint sid = g_connection_{}->rev_map2sid[j+start_loc];".format(self.name.lower())) cu_gen.block("\t\t\tdata->p_apre[sid] *= exp((data->p_last_update[sid] - t) / (data->p_tau_pre[sid]));") cu_gen.block("\t\t\tdata->p_apost[sid] *= exp((data->p_last_update[sid] - t) / (data->p_tau_post[sid]));") cu_gen.block("\t\t\tdata->p_apost[sid] += data->p_d_apost[sid];") cu_gen.block("\t\t\tdata->p_weight[sid] = _clip(data->p_weight[sid] + data->p_apre[sid], %s, %s);" % ("G_MIN", "G_MAX")) cu_gen.block("\t\t\tdata->p_last_update[sid] = t;") cu_gen.block("\t\t}") cu_gen.block("\t}") cu_gen.block("\t__syncthreads();") cu_gen.block("}") cu_gen.blank_line() # cu_gen.block("__global__ void update_{}_gpu({} *data, int num, int start_id, CConnection * connection)" # .format(self.name.lower(), self.name)) # cu_gen.block("{") # cu_gen.block("\tfor (int delta_t={}; delta_t<={}; delta_t++) {{".format("MIN_DELAY", "MAX_DELAY")) # cu_gen.block("\t\tint block_idx = blockIdx.x;") # cu_gen.block("\t\tint time_idx = (gCurrentIdx + {} - delta_t) % ({} + 1);".format("MAX_DELAY", "MAX_DELAY")) # cu_gen.block("\t\tint firedSize = g_fired_tableSizes[time_idx];") # cu_gen.block("\t\tint num_per_block = (firedSize - 1) / gridDim.x + 1;") # cu_gen.block("\t\tint block_nums_minus_1 = (firedSize - 1) / num_per_block;") # cu_gen.block("") # cu_gen.block("\t\tint fired_size_block = 0;") # cu_gen.block("\t\tif (block_idx == block_nums_minus_1) {") # cu_gen.block("\t\tfired_size_block = firedSize - block_idx * num_per_block;") # cu_gen.block("\t\t} else if (block_idx < block_nums_minus_1) {") # cu_gen.block("\t\tfired_size_block = num_per_block;") # cu_gen.block("\t\t} else {") # cu_gen.block("\t\tfired_size_block = 0;") # cu_gen.block("\t\t}") # cu_gen.blank_line() # cu_gen.block("\t\tfor (int idx = 0; idx < fired_size_block; idx++) {") # cu_gen.block("\t\t\tint nid = g_fired_table[time_idx * g_fired_tableCap + (block_idx) * num_per_block + idx];") # cu_gen.block("\t\t\tint start_loc = connection->rev_delayStart[delta_t + nid * MAX_DELAY];") # cu_gen.block("\t\t\tint synapseNum = connection->rev_delayNum[delta_t + nid * MAX_DELAY];") # cu_gen.block("\t\t\tif (threadIdx.x == 0) {") # cu_gen.block("\t\t\tgLayerInput[nid]++;") # cu_gen.block("\t\t\t}") # cu_gen.block("\t\t\tfor (int j=threadIdx.x; j < synapseNum; j += blockDim.x) {") # cu_gen.block("\t\t\tint sid = connection->rev_map2sid[j+start_loc];") # cu_gen.block("") # cu_gen.block("\t\t\tdata->p_apre[sid] *= exp((data->p_last_update[sid] - t) / (data->p_tau_pre[sid]));") # cu_gen.block("\t\t\tdata->p_apost[sid] *= exp((data->p_last_update[sid] - t) / (data->p_tau_post[sid]));") # cu_gen.block("\t\t\tdata->p_apost[sid] += data->p_d_apost[sid];") # cu_gen.block("\t\t\tdata->p_weight[sid] = _clip(weight + data->p_pre[sid], %s, %s);" % ("gMin", "gMax")) # cu_gen.block("\t\t\tdata->p_last_update[sid] = gCurrentCycle;") # cu_gen.block("\t\t\t}") # cu_gen.block("\t\t}") # cu_gen.block("\t\t__syncthreads();") # cu_gen.block("\t}") # cu_gen.block("}") # cu_gen.blank_line() cu_gen.block("void update_{}({} *data, int num, int start_id, int t)" .format(self.name.lower(), self.name.capitalize())) cu_gen.block("{") # cu_gen.block("\t\tfind_%s_gpu<<<size=>gridSize, size->blockSize>>>((%s*)data, num, start_id);" % # (self.name.lower(), self.name.capitalize())) cu_gen.block("\tupdate_{}_gpu<<<{}_GRID_SIZE, {}_BLOCK_SIZE>>>(({}*)data, num, start_id, t);" .format(self.name.lower(), self.name.upper(), self.name.upper(), self.name.capitalize())) if self.post_learn: cu_gen.block("\tlearn_{}_post<<<{}_GRID_SIZE, {}_BLOCK_SIZE>>>(({}*)data, num, start_id, t);" .format(self.name.lower(), self.name.upper(), self.name.upper(), self.name.capitalize())) cu_gen.block("}") cu_gen.blank_line() cu_gen.close() return