def run(self, in_array): """ Do numerical integration """ assert in_array.ndim == 4 if self.input_shape is None: self.input_shape = in_array.shape self._sanity_check() self._prepare_kernels() self._prepare_tensors() SSN = self.parameters.ssn SSF = self.parameters.ssf ETA = self.parameters.eta TAU = self.parameters.tau EPSILON = self.parameters.epsilon SIGMA = self.parameters.sigma DELTA = self.parameters.delta GAMMA = self.parameters.gamma ALPHA = self.parameters.alpha BETA = self.parameters.beta ZETA = self.parameters.zeta OMEGA = self.parameters.omega XI = self.parameters.xi self.strides = [1,1,1,1] tf_eta = tf.get_variable(name='h/eta',dtype=self.floatXtf,initializer=np.array(self.stepsize * 1.0/ETA).astype(self.floatXnp)) tf_eps_eta = tf.get_variable(name='eps_h/eta',dtype=self.floatXtf,initializer=np.array(1.0 - EPSILON**2 * self.stepsize * 1.0/ETA).astype(self.floatXnp)) tf_tau = tf.get_variable(name='h/tau',dtype=self.floatXtf,initializer=np.array(self.stepsize * 1.0/TAU).astype(self.floatXnp)) tf_sig_tau = tf.get_variable(name='sig_h/tau',dtype=self.floatXtf,initializer=np.array(1.0 - SIGMA**2 * self.stepsize * 1.0/TAU).astype(self.floatXnp)) # load copies of input into GPU self.X = tf.placeholder(name='input',dtype=self.floatXtf,shape=self.input_shape) if self.verbose: pbar = pb(self.maxiter, 'Building graph on [GPU]') #Using run_reference implementation self.O = tf.identity(self.X) self.I = tf.identity(self.X) for idx in range(self.maxiter): U = tf.nn.conv2d(self.O,self._gpu_u,self.strides,padding='SAME') T = tf.nn.conv2d(self.O,self._gpu_t,self.strides,padding='SAME') P = tf.nn.conv2d(self.I,self._gpu_p,self.strides,padding='SAME') Q = tf.nn.conv2d(self.I,self._gpu_q,self.strides,padding='SAME') I_summand = tf.nn.relu((XI * self.X) - ((ALPHA * self.I + self.parameters.mu) * U) - ((BETA * self.I + self.parameters.nu) * T)) self.I = tf_eps_eta * self.I + tf_eta * I_summand O_summand = tf.nn.relu(ZETA * self.I + GAMMA * P + DELTA * Q) self.O = tf_sig_tau * self.O + tf_tau * O_summand if self.verbose: pbar.update(idx) self.out_O = self.O self.out_I = self.I if self.verbose: pbar.finish()
def create_stims(extra_vars): extra_vars = Bunch(extra_vars) out_dir = re.split('\.', extra_vars.f4_stimuli_file)[0] if not os.path.exists(out_dir): os.makedirs(out_dir) ################# nc = len(extra_vars._DEFAULT_KW2015_SO_PARAMETERS['selected_channels']) # build stimuli ############### all_hues_dkls = sp.linspace(0.0, 2 * sp.pi, extra_vars.n_train, endpoint=False) test_hues_dklS = all_hues_dkls[::extra_vars.n_train // extra_vars.n_t_hues][:extra_vars.n_t_hues] surr_hues_dklS = test_hues_dklS[::extra_vars.n_t_hues // extra_vars.n_s_hues][:extra_vars.n_s_hues] #sp.linspace(0.0, 2*sp.pi, n_s_hues, endpoint=False) test_sat_dklS = 0.2 surr_sat_dklS = 0.16 isolum_el = 0.0 # elevation is 0 to get isolumination to background stims_all_lms = sp.zeros( (extra_vars.n_train, extra_vars.size, extra_vars.size, 3)) stims_ind_lms = sp.zeros((extra_vars.n_t_hues, extra_vars.n_s_hues, extra_vars.size, extra_vars.size, 3)) pbar_counter = 0 pbar = pb((extra_vars.n_s_hues + 1) * extra_vars.n_train, 'Building isoluminant stimuli [all]') for i, azt in enumerate(all_hues_dkls): dklS_ = sp.array([test_sat_dklS, azt, isolum_el]) c_lms_ = dklC2lms(sph2cart(dklS_)) stims_all_lms[i, ..., 0] = stim.get_center_surround(size=extra_vars.size, csize=extra_vars.csize, cval=c_lms_[0], sval=gray_lms[0]) stims_all_lms[i, ..., 1] = stim.get_center_surround(size=extra_vars.size, csize=extra_vars.csize, cval=c_lms_[1], sval=gray_lms[1]) stims_all_lms[i, ..., 2] = stim.get_center_surround(size=extra_vars.size, csize=extra_vars.csize, cval=c_lms_[2], sval=gray_lms[2]) pbar_counter += 1 pbar.update(pbar_counter) pbar.finish() pbar_counter = 0 pbar = pb((extra_vars.n_s_hues + 1) * extra_vars.n_t_hues, 'Building isoluminant stimuli [ind]') for i, azt in enumerate(test_hues_dklS): dklS_ = sp.array([test_sat_dklS, azt, isolum_el]) c_lms_ = dklC2lms(sph2cart(dklS_)) for j, azs in enumerate(surr_hues_dklS): dklS_ = sp.array([surr_sat_dklS, azs, isolum_el]) s_lms_ = dklC2lms(sph2cart(dklS_)) stims_ind_lms[i, j, ..., 0] = stim.get_center_surround(size=extra_vars.size, csize=extra_vars.csize, cval=c_lms_[0], sval=s_lms_[0]) stims_ind_lms[i, j, ..., 1] = stim.get_center_surround(size=extra_vars.size, csize=extra_vars.csize, cval=c_lms_[1], sval=s_lms_[1]) stims_ind_lms[i, j, ..., 2] = stim.get_center_surround(size=extra_vars.size, csize=extra_vars.csize, cval=c_lms_[2], sval=s_lms_[2]) pbar_counter += 1 pbar.update(pbar_counter) pbar.finish() # compute vanilla SO features for those stimuli ############################################### so_all = sp.zeros( (extra_vars.n_train, nc, extra_vars.size, extra_vars.size)) so_ind = sp.zeros((extra_vars.n_t_hues, extra_vars.n_s_hues, nc, extra_vars.size, extra_vars.size)) pbar = pb(extra_vars.n_train, 'Computing SO features [all]') for idx in range(extra_vars.n_train): so_all[idx] = GET_SO(stims_all_lms[idx], extra_vars._DEFAULT_FLOATX_NP, extra_vars._DEFAULT_KW2015_SO_PARAMETERS) pbar.update(idx) pbar.finish() pbar = pb(extra_vars.n_t_hues * extra_vars.n_s_hues, 'Computing SO features [ind]') for idx in range(extra_vars.n_t_hues): for jdx in range(extra_vars.n_s_hues): so_ind[idx, jdx] = GET_SO(stims_ind_lms[idx, jdx], extra_vars._DEFAULT_FLOATX_NP, extra_vars._DEFAULT_KW2015_SO_PARAMETERS) pbar.update(jdx + idx * extra_vars.n_s_hues) pbar.finish() so_ind = so_ind.reshape(extra_vars.n_t_hues * extra_vars.n_s_hues, nc, extra_vars.size, extra_vars.size) #Final ops cs_hue_diff = da2ha(test_hues_dklS.reshape(extra_vars.n_t_hues, 1) - \ surr_hues_dklS.reshape(1, extra_vars.n_s_hues)) cs_hue_diff *= (180 / sp.pi) np.savez(extra_vars.f4_stimuli_file, so_all=so_all, so_ind=so_ind, stims_all_lms=stims_all_lms, cs_hue_diff=cs_hue_diff)
def run_reference(afferent, p=_DEFAULT_PARAMETERS, axis=-3, maxiter=_DEFAULT_MAXITER, h=_DEFAULT_STEPSIZE, keeptime=_DEFAULT_KEEPTIME, verbose=_DEFAULT_VERBOSE): """ Integrate with Forward Euler method with integration step size h """ ###################################### # re-arrange array into canonical form ###################################### axis = axis % afferent.ndim O, initsz, nunits = to4(afferent, axis=axis) I, O_t, I_t = O.copy(), [], [] if keeptime: O_t.append(O) I_t.append(I) ############ # parameters ############ p = _DEFAULT_PARAMETERS if p is None else p sigma, tau = p['sigma'], p['tau'] epsilon, eta = p['epsilon'], p['eta'] ssc, sss = p['ssc'], p['sss'] gamma, alpha, mu = p['gamma'], p['alpha'], p['mu'] delta, beta, nu = p['delta'], p['beta'], p['nu'] xi, zeta, omega = p['xi'], p['zeta'], p['omega'] ############################################## # make sure pool sizes, input sizes make sense ############################################## assert sss < afferent.shape[-2] assert sss < afferent.shape[-1] assert ssc < sss assert sss % 2 == 1 assert ssc % 2 == 1 tuned_pooling_method = 'mean' # 'max' untuned_pooling_method = 'mean' # 'max' ################################# # tuned summation: center pooling ################################# zeta = 1.0 # because here unlike the GPU implementation we exclude center # and we pulled the default parameters from GPU implementation pool_P = { 'type': 'pool', 'mode': tuned_pooling_method, 'size': (1, 1, ssc, ssc), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': (1, 1), 'subpool': { 'type': None }, } #################################################### # untuned suppression: reduction across feature axis #################################################### pool_U = { 'type': 'pool', 'mode': untuned_pooling_method, 'size': (1, -1, 1, 1), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': None, } ##################################### # tuned suppression: surround pooling ##################################### pool_T = { 'type': 'pool', 'mode': tuned_pooling_method, 'size': (1, 1, sss, sss), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': (ssc, ssc), 'subpool': { 'type': None }, } ######################## # untuned summation: cRF ######################## V = sp.linspace(0.0, 1.0, nunits) W = stats.norm.pdf(V, loc=V[nunits // 2], scale=omega) W /= W.sum() pool_Q = { 'type': 'conv', 'fb': W, 'padding': 'wrap', 'im_dims': 'ndhw', 'fb_dims': 'd', 'corr': False } ################### # pooling functions ################### untuned_suppression = lambda arr: recursive_pool( arr, params=pool_U, keyname='subpool', verbose=False) tuned_suppression = lambda arr: recursive_pool( arr, params=pool_T, keyname='subpool', verbose=False) tuned_summation = lambda arr: recursive_pool( arr, params=pool_P, keyname='subpool', verbose=False) untuned_summation = lambda arr: recursive_pool( arr, params=pool_Q, keyname='subpool', verbose=False) relu = lambda x: hwrectify(x, '+') # relu = lambda x: softplus(x, 10.0) ################################################### # iterate lateral connections and store time frames ################################################### if verbose: pbar = pb(maxiter, 'Integrating [HOST]') for i in range(maxiter): U = untuned_suppression(O) T = tuned_suppression(O) P = tuned_summation(I) Q = untuned_summation(I) I_summand = relu(xi * afferent - (alpha * I + mu) * U - (beta * I + nu) * T) I = (1. - epsilon**2 * h / eta) * I + h / eta * I_summand O_summand = relu(zeta * I + gamma * P + delta * Q) O = (1. - sigma**2 * h / tau) * O + h / tau * O_summand if keeptime: I_t.append(I) O_t.append(O) if verbose: pbar.update(i) if verbose: pbar.finish() ################ # postprocessing ################ out_I = from4(I_t if keeptime else I, axis=axis, keeptime=keeptime, size=initsz) out_O = from4(O_t if keeptime else O, axis=axis, keeptime=keeptime, size=initsz) afferent.shape = initsz return out_I, out_O
def run_all_interneurons(afferent, axis=-3, maxiter=50, h=1., keeptime=True, verbose=True): """ Integrate with Forward Euler method with integration step size h """ ###################################### # re-arrange array into canonical form ###################################### axis = axis % afferent.ndim Pyr, initsz, nunits = to4(afferent, axis=axis) Pyr_t = [] # base_shape = Pyr.shape # intr_shape = Pyr.shape # intr_shape[-3] = 1 # Som, Som_t = sp.zeros(base_shape), [] # Pvb, Pvb_t = sp.zeros(intr_shape), [] # Vip, Vip_t = sp.zeros(intr_shape), [] if keeptime: Pyr_t.append(Pyr) # Som_t.append(Som) # Pvb_t.append(Pvb) # Vip_t.append(Vip) ############ # parameters ############ ssc = 9 sss = 29 tau = 5.00 sigma = 1.00 omega = 0.15 k_FF_Pyr = 2.00 k_SE_Pyr = 1.90 k_SI_Pyr = 2.00 k_HE_Pyr = 1.00 k_HI_Pyr = 3.00 ############################################## # make sure pool sizes, input sizes make sense ############################################## assert sss < afferent.shape[-2] assert sss < afferent.shape[-1] assert ssc < sss assert sss % 2 == 1 assert ssc % 2 == 1 tuned_pooling_method = 'mean' # 'max' untuned_pooling_method = 'mean' # 'max' ################################# # tuned summation: center pooling ################################# pool_P = { 'type': 'pool', 'mode': tuned_pooling_method, 'size': (1, 1, ssc, ssc), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': (1, 1), 'subpool': { 'type': None }, } #################################################### # untuned suppression: reduction across feature axis #################################################### pool_U = { 'type': 'pool', 'mode': untuned_pooling_method, 'size': (1, -1, 1, 1), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': None, } ##################################### # tuned suppression: surround pooling ##################################### pool_T = { 'type': 'pool', 'mode': tuned_pooling_method, 'size': (1, 1, sss, sss), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': None, #(ssc, ssc), 'subpool': { 'type': None }, } ######################## # untuned summation: cRF ######################## V = sp.linspace(0.0, 1.0, nunits) W = stats.norm.pdf(V, loc=V[nunits // 2], scale=omega) W /= W.sum() pool_Q = { 'type': 'conv', 'fb': W, 'padding': 'wrap', 'im_dims': 'ndhw', 'fb_dims': 'd', 'corr': False } ################### # pooling functions ################### untuned_suppression = lambda arr: recursive_pool( arr, params=pool_U, keyname='subpool', verbose=False) tuned_suppression = lambda arr: recursive_pool( arr, params=pool_T, keyname='subpool', verbose=False) tuned_summation = lambda arr: recursive_pool( arr, params=pool_P, keyname='subpool', verbose=False) untuned_summation = lambda arr: recursive_pool( arr, params=pool_Q, keyname='subpool', verbose=False) relu = lambda x: hwrectify(x, '+') ################################################### # iterate lateral connections and store time frames ################################################### if verbose: pbar = pb(maxiter, 'Integrating [HOST]') for i in range(maxiter): U = untuned_suppression(Pyr) T = tuned_suppression(Pyr) P = tuned_summation(Pyr) Q = untuned_summation(Pyr) Pyr_dendritic = relu(k_FF_Pyr * afferent \ + k_HE_Pyr * Q \ + k_SE_Pyr * P \ - k_SI_Pyr * T) Pyr_summand = relu(Pyr_dendritic \ - k_HI_Pyr * U) Pyr = (1. - sigma**2 * h / tau) * Pyr + h / tau * Pyr_summand if keeptime: Pyr_t.append(Pyr) if verbose: pbar.update(i) if verbose: pbar.finish() ################ # postprocessing ################ out_Pyr = from4(Pyr_t if keeptime else Pyr, axis=axis, keeptime=keeptime, size=initsz) afferent.shape = initsz return out_Pyr
def run(afferent, p=_DEFAULT_PARAMETERS_new, axis=-3, maxiter=_DEFAULT_MAXITER_new, h=_DEFAULT_STEPSIZE_new, keeptime=_DEFAULT_KEEPTIME, verbose=_DEFAULT_VERBOSE): """ Integrate with Forward Euler method with integration step size h """ ###################################### # re-arrange array into canonical form ###################################### axis = axis % afferent.ndim Pyr, initsz, nunits = to4(afferent, axis=axis) Int = Pyr.copy() Pyr_t = [] Int_t = [] if keeptime: Pyr_t.append(Pyr) Int_t.append(Int) ############ # parameters ############ p = _DEFAULT_PARAMETERS if p is None else p sigma, tau = p['sigma'], p['tau'] epsilon, eta = p['epsilon'], p['eta'] ssc, sss = p['ssc'], p['sss'] gamma, alpha, mu = p['gamma'], p['alpha'], p['mu'] delta, beta, nu = p['delta'], p['beta'], p['nu'] xi, zeta, omega = p['xi'], p['zeta'], p['omega'] phi, psi = p['phi'], p['psi'] ############################################## # make sure pool sizes, input sizes make sense ############################################## assert sss < afferent.shape[-2] assert sss < afferent.shape[-1] assert ssc < sss assert sss % 2 == 1 assert ssc % 2 == 1 tuned_pooling_method = 'mean' # 'max' untuned_pooling_method = 'mean' # 'max' ################################# # tuned summation: center pooling ################################# pool_P = { 'type': 'pool', 'mode': tuned_pooling_method, 'size': (1, 1, ssc, ssc), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': (1, 1), 'subpool': { 'type': None }, } #################################################### # untuned suppression: reduction across feature axis #################################################### pool_U = { 'type': 'pool', 'mode': untuned_pooling_method, 'size': (1, -1, 1, 1), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': None, } ##################################### # tuned suppression: surround pooling ##################################### pool_T = { 'type': 'pool', 'mode': tuned_pooling_method, 'size': (1, 1, sss, sss), 'padding': 'reflect', 'stride_size': None, 'keepdims': True, 'exclude_center': (1, 1), #(ssc, ssc), 'subpool': { 'type': None }, } ######################## # untuned summation: cRF ######################## V = sp.linspace(0.0, 1.0, nunits) W = stats.norm.pdf(V, loc=V[nunits // 2], scale=omega) W /= W.sum() pool_Q = { 'type': 'conv', 'fb': W, 'padding': 'wrap', 'im_dims': 'ndhw', 'fb_dims': 'd', 'corr': False } ################### # pooling functions ################### untuned_suppression = lambda arr: recursive_pool( arr, params=pool_U, keyname='subpool', verbose=False) tuned_suppression = lambda arr: recursive_pool( arr, params=pool_T, keyname='subpool', verbose=False) tuned_summation = lambda arr: recursive_pool( arr, params=pool_P, keyname='subpool', verbose=False) untuned_summation = lambda arr: recursive_pool( arr, params=pool_Q, keyname='subpool', verbose=False) relu = lambda x: hwrectify(x, '+') # relu = lambda x: softplus(x, 10.0) ################################################### # iterate lateral connections and store time frames ################################################### if verbose: pbar = pb(maxiter, 'Integrating [HOST]') for i in range(maxiter): U = untuned_suppression(Pyr) T = tuned_suppression(Pyr) P = tuned_summation(Pyr) Q = untuned_summation(Pyr) Int_summand = relu(zeta * Pyr \ + alpha * U \ + beta * T \ - psi ** 2) Int = (1. - epsilon**2 * h / eta) * Int + h / eta * Int_summand Pyr_summand = relu(xi * afferent #+ 0.25 * tuned_summation(afferent) \ + gamma * P \ + delta * Q \ - (mu * Pyr + nu) * Int \ - phi ** 2) Pyr = (1. - sigma**2 * h / tau) * Pyr + h / tau * Pyr_summand if keeptime: Int_t.append(Int) Pyr_t.append(Pyr) if verbose: pbar.update(i) if verbose: pbar.finish() ################ # postprocessing ################ out_Int = from4(Int_t if keeptime else Int, axis=axis, keeptime=keeptime, size=initsz) out_Pyr = from4(Pyr_t if keeptime else Pyr, axis=axis, keeptime=keeptime, size=initsz) afferent.shape = initsz return out_Int, out_Pyr
def run(self, in_array, from_gpu=True): """ Do numerical integration """ assert in_array.ndim == 4 if self.input_shape is None: self.input_shape = in_array.shape self._sanity_check() self._prepare_kernels() self._prepare_tensors() SSN = self.parameters.ssn SSF = self.parameters.ssf ETA = self.parameters.eta TAU = self.parameters.tau EPSILON = self.parameters.epsilon SIGMA = self.parameters.sigma DELTA = self.parameters.delta GAMMA = self.parameters.gamma ALPHA = self.parameters.alpha BETA = self.parameters.beta ZETA = self.parameters.zeta OMEGA = self.parameters.omega XI = self.parameters.xi # load copies of input into GPU self._gpu_input = _array2gpu(in_array) cucopy(self.cublasContext, self._gpu_input.size, self._gpu_input.gpudata, 1, self.X.gpudata, 1) self.cudaContext.synchronize() cucopy(self.cublasContext, self._gpu_input.size, self._gpu_input.gpudata, 1, self.Y.gpudata, 1) self.cudaContext.synchronize() if self.keeptime: try: self.X_t[0] = in_array.get() self.Y_t[0] = in_array.get() except AttributeError: self.X_t[0] = in_array self.Y_t[0] = in_array # create a bunch of pointers for cuDNN X__ptr__ = _gpuarray2ptr(self.X) Y__ptr__ = _gpuarray2ptr(self.Y) u__ptr__ = _gpuarray2ptr(self._gpu_u) p__ptr__ = _gpuarray2ptr(self._gpu_p) t__ptr__ = _gpuarray2ptr(self._gpu_t) buf1__ptr__ = _gpuarray2ptr(self._gpu_buf1) buf2__ptr__ = _gpuarray2ptr(self._gpu_buf2) buf3__ptr__ = _gpuarray2ptr(self._gpu_buf3) buf4__ptr__ = _gpuarray2ptr(self._gpu_buf4) if self.parameters.omega: q__ptr__ = _gpuarray2ptr(self._gpu_q) if self.verbose: pbar = pb(self.maxiter, 'Integrating [GPU:%i]' % (self.i_gpu, )) for idx in range(self.maxiter): # [-(alpha*X+mu) -> B2] <<<PASS>>> ########################################################### cucopy(self.cublasContext, self.X.size, self.X.gpudata, 1, self._gpu_buf2.gpudata, 1) self.cudaContext.synchronize() cuscal(self.cublasContext, self._gpu_buf2.size, -ALPHA, self._gpu_buf2.gpudata, 1) self.cudaContext.synchronize() self._bcastbias_cuda(self._gpu_buf2, self._gpu_negMU, block=self._bl, grid=self._gr) self.cudaContext.synchronize() # [compute(U); U -> B4] <<<PASS:max|ERR|<1e-7>>> ########################################################### cudnn.cudnnConvolutionForward(self.cudnnContext, 1.0, self._desc_Y, Y__ptr__, self._desc_u, u__ptr__, self._desc_U, self._algo_u, None, 0, 0.0, self._desc_buf4, buf4__ptr__, self._cudnn_data_type) self.cudaContext.synchronize() # [B2 *= B4 := U] <<<PASS>>> ########################################################### self._bcastmul_cuda(self._gpu_buf2, self._gpu_buf4, block=self._bl, grid=self._gr) self.cudaContext.synchronize() if self.keepvars: self.U_t[idx] = -1.0 * self._gpu_buf2.get() # [XI * L -> B1] <<<PASS>>> ########################################################### cucopy(self.cublasContext, self._gpu_input.size, self._gpu_input.gpudata, 1, self._gpu_buf1.gpudata, 1) self.cudaContext.synchronize() cuscal(self.cublasContext, self._gpu_buf1.size, XI, self._gpu_buf1.gpudata, 1) self.cudaContext.synchronize() ########################################################### ########################################################### # import warnings # warnings.warn('Shunting inhibition introduced ' + \ # 'as an experimental feature!!!') # cucopy(self.cublasContext, # self.X.size,self.X.gpudata, 1, self._gpu_buf3.gpudata, 1) # self.cudaContext.synchronize() # self._pwisemul_cuda(self._gpu_buf3, # self._gpu_input, block=self._bl, grid=self._gr) # self.cudaContext.synchronize() # cuscal(self.cublasContext, # self._gpu_buf3.size, -XI*0.5, self._gpu_buf3.gpudata, 1) # self.cudaContext.synchronize() # cuaxpy(self.cublasContext, self._gpu_buf3.size, 1.0, # self._gpu_buf3.gpudata, 1, self._gpu_buf1.gpudata, 1) # self.cudaContext.synchronize() ########################################################### ########################################################### # [B1 += B2 := -(alpha*X+mu).*U] <<<PASS>>> ########################################################### cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0, self._gpu_buf2.gpudata, 1, self._gpu_buf1.gpudata, 1) self.cudaContext.synchronize() # [-(beta*X+nu) -> B2] <<<PASS>>> ########################################################### cucopy(self.cublasContext, self.X.size, self.X.gpudata, 1, self._gpu_buf2.gpudata, 1) self.cudaContext.synchronize() cuscal(self.cublasContext, self._gpu_buf2.size, -BETA, self._gpu_buf2.gpudata, 1) self.cudaContext.synchronize() self._bcastbias_cuda(self._gpu_buf2, self._gpu_negNU, block=self._bl, grid=self._gr) self.cudaContext.synchronize() # [T<excluding_center> -> B3] <<<PASS:max|ERR|<1e-3,avg=1e-5>>> ########################################################### cudnn.cudnnConvolutionForward(self.cudnnContext, 1.0, self._desc_Y, Y__ptr__, self._desc_t, t__ptr__, self._desc_T, self._algo_t, None, 0, 0.0, self._desc_buf3, buf3__ptr__, self._cudnn_data_type) self.cudaContext.synchronize() # [B2 *= B3 := T] <<<PASS>>> ########################################################### self._pwisemul_cuda(self._gpu_buf2, self._gpu_buf3, block=self._bl, grid=self._gr) self.cudaContext.synchronize() if self.keepvars: self.T_t[idx] = -1.0 * self._gpu_buf2.get() # [B1 += B2 := -(beta*X+nu).*T] <<<PASS>>> ########################################################### cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0, self._gpu_buf2.gpudata, 1, self._gpu_buf1.gpudata, 1) self.cudaContext.synchronize() # [now B1 := X_summand; rectify(B1) -> B2] <<<PASS>>> ########################################################### cudnn.cudnnActivationForward(self.cudnnContext, self._cudnn_relu_act, 1.0, self._desc_buf1, buf1__ptr__, 0.0, self._desc_buf2, buf2__ptr__) self.cudaContext.synchronize() # [B2 *= h/eta] <<<PASS>>> ########################################################### cuscal(self.cublasContext, self._gpu_buf2.size, self.stepsize * 1.0 / ETA, self._gpu_buf2.gpudata, 1) self.cudaContext.synchronize() # [X *= (1-epsilon**2 * h/eta)] <<<PASS>>> ########################################################### cuscal(self.cublasContext, self.X.size, (1.0 - EPSILON**2 * self.stepsize * 1.0 / ETA), self.X.gpudata, 1) self.cudaContext.synchronize() # [X += B2 := h/eta * X_summand] <<<PASS>>> ########################################################### cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0, self._gpu_buf2.gpudata, 1, self.X.gpudata, 1) self.cudaContext.synchronize() # [X done; X -> B1] <<<ASSUMED_PASS>>> ########################################################### cucopy(self.cublasContext, self.X.size, self.X.gpudata, 1, self._gpu_buf1.gpudata, 1) self.cudaContext.synchronize() # [B1 = zeta * B1 + gamma * P] <<<ASSUMED_PASS:max|ERR|<1e-7>>> ########################################################### cudnn.cudnnConvolutionForward(self.cudnnContext, GAMMA, self._desc_X, X__ptr__, self._desc_p, p__ptr__, self._desc_P, self._algo_p, None, 0, ZETA, self._desc_buf1, buf1__ptr__, self._cudnn_data_type) self.cudaContext.synchronize() if self.keepvars: self.Q_t[idx] = self._gpu_buf1.get() self.P_t[idx] = self.Q_t[idx] - ZETA * self.X.get() # [B1 = 1.0 * B1 + delta * Q] <<<ASSUMED_PASS>>> ########################################################### if self.parameters.omega: cudnn.cudnnConvolutionForward( self.cudnnContext, DELTA, self._desc_X, X__ptr__, self._desc_q, q__ptr__, self._desc_Q, self._algo_q, None, 0, 1.0, self._desc_buf1, buf1__ptr__, self._cudnn_data_type) self.cudaContext.synchronize() if self.keepvars: self.Q_t[idx] = self._gpu_buf1.get() - self.Q_t[idx] # [rectify(B1) -> B2] <<<PASS>>> ########################################################### cudnn.cudnnActivationForward(self.cudnnContext, self._cudnn_relu_act, 1.0, self._desc_buf1, buf1__ptr__, 0.0, self._desc_buf2, buf2__ptr__) self.cudaContext.synchronize() # [now B2 := Y_summand; B2 *= h/tau] <<<PASS>>> ########################################################### cuscal(self.cublasContext, self._gpu_buf2.size, self.stepsize * 1.0 / TAU, self._gpu_buf2.gpudata, 1) self.cudaContext.synchronize() # [Y *= (1-sigma**2 * h/tau)] <<<PASS>>> ########################################################### cuscal(self.cublasContext, self.Y.size, (1.0 - SIGMA**2 * self.stepsize * 1.0 / TAU), self.Y.gpudata, 1) self.cudaContext.synchronize() # [Y += B2 := h/tau *Y _summand; then Y is done] <<<PASS>>> ########################################################### cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0, self._gpu_buf2.gpudata, 1, self.Y.gpudata, 1) self.cudaContext.synchronize() if self.keeptime: self.X_t[idx + 1] = self.X.get() self.Y_t[idx + 1] = self.Y.get() if self.verbose: pbar.update(idx) if self.verbose: pbar.finish() if from_gpu: self.X = self.X.get() self.Y = self.Y.get()