def run(self, in_array):
        """ Do numerical integration
        """
        assert in_array.ndim == 4
        if self.input_shape is None:
            self.input_shape = in_array.shape
            self._sanity_check()
            self._prepare_kernels()
            self._prepare_tensors()

        SSN = self.parameters.ssn
        SSF = self.parameters.ssf
        ETA = self.parameters.eta
        TAU = self.parameters.tau
        EPSILON = self.parameters.epsilon
        SIGMA = self.parameters.sigma
        DELTA = self.parameters.delta
        GAMMA = self.parameters.gamma
        ALPHA = self.parameters.alpha
        BETA = self.parameters.beta
        ZETA = self.parameters.zeta
        OMEGA = self.parameters.omega
        XI = self.parameters.xi
        self.strides = [1,1,1,1]
        tf_eta = tf.get_variable(name='h/eta',dtype=self.floatXtf,initializer=np.array(self.stepsize * 1.0/ETA).astype(self.floatXnp))
        tf_eps_eta = tf.get_variable(name='eps_h/eta',dtype=self.floatXtf,initializer=np.array(1.0 - EPSILON**2 * self.stepsize * 1.0/ETA).astype(self.floatXnp))
        tf_tau = tf.get_variable(name='h/tau',dtype=self.floatXtf,initializer=np.array(self.stepsize * 1.0/TAU).astype(self.floatXnp))
        tf_sig_tau = tf.get_variable(name='sig_h/tau',dtype=self.floatXtf,initializer=np.array(1.0 - SIGMA**2 * self.stepsize * 1.0/TAU).astype(self.floatXnp))
        
        # load copies of input into GPU
        self.X = tf.placeholder(name='input',dtype=self.floatXtf,shape=self.input_shape)

        if self.verbose: pbar = pb(self.maxiter,
            'Building graph on [GPU]')

        #Using run_reference implementation
        self.O = tf.identity(self.X)
        self.I = tf.identity(self.X)

        for idx in range(self.maxiter):
            U = tf.nn.conv2d(self.O,self._gpu_u,self.strides,padding='SAME')
            T = tf.nn.conv2d(self.O,self._gpu_t,self.strides,padding='SAME')
            P = tf.nn.conv2d(self.I,self._gpu_p,self.strides,padding='SAME')
            Q = tf.nn.conv2d(self.I,self._gpu_q,self.strides,padding='SAME')

            I_summand = tf.nn.relu((XI * self.X)
                - ((ALPHA * self.I + self.parameters.mu) * U)
                - ((BETA * self.I + self.parameters.nu) * T))
            self.I = tf_eps_eta * self.I + tf_eta * I_summand

            O_summand = tf.nn.relu(ZETA * self.I
                + GAMMA * P
                + DELTA * Q)
            self.O = tf_sig_tau * self.O + tf_tau * O_summand
            if self.verbose: pbar.update(idx)
        self.out_O = self.O
        self.out_I = self.I
        if self.verbose: pbar.finish()
def create_stims(extra_vars):
    extra_vars = Bunch(extra_vars)
    out_dir = re.split('\.', extra_vars.f4_stimuli_file)[0]
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    #################
    nc = len(extra_vars._DEFAULT_KW2015_SO_PARAMETERS['selected_channels'])

    # build stimuli
    ###############
    all_hues_dkls = sp.linspace(0.0,
                                2 * sp.pi,
                                extra_vars.n_train,
                                endpoint=False)
    test_hues_dklS = all_hues_dkls[::extra_vars.n_train //
                                   extra_vars.n_t_hues][:extra_vars.n_t_hues]
    surr_hues_dklS = test_hues_dklS[::extra_vars.n_t_hues //
                                    extra_vars.n_s_hues][:extra_vars.n_s_hues]
    #sp.linspace(0.0, 2*sp.pi, n_s_hues, endpoint=False)
    test_sat_dklS = 0.2
    surr_sat_dklS = 0.16
    isolum_el = 0.0  # elevation is 0 to get isolumination to background

    stims_all_lms = sp.zeros(
        (extra_vars.n_train, extra_vars.size, extra_vars.size, 3))
    stims_ind_lms = sp.zeros((extra_vars.n_t_hues, extra_vars.n_s_hues,
                              extra_vars.size, extra_vars.size, 3))

    pbar_counter = 0
    pbar = pb((extra_vars.n_s_hues + 1) * extra_vars.n_train,
              'Building isoluminant stimuli [all]')
    for i, azt in enumerate(all_hues_dkls):
        dklS_ = sp.array([test_sat_dklS, azt, isolum_el])
        c_lms_ = dklC2lms(sph2cart(dklS_))
        stims_all_lms[i, ...,
                      0] = stim.get_center_surround(size=extra_vars.size,
                                                    csize=extra_vars.csize,
                                                    cval=c_lms_[0],
                                                    sval=gray_lms[0])
        stims_all_lms[i, ...,
                      1] = stim.get_center_surround(size=extra_vars.size,
                                                    csize=extra_vars.csize,
                                                    cval=c_lms_[1],
                                                    sval=gray_lms[1])
        stims_all_lms[i, ...,
                      2] = stim.get_center_surround(size=extra_vars.size,
                                                    csize=extra_vars.csize,
                                                    cval=c_lms_[2],
                                                    sval=gray_lms[2])

        pbar_counter += 1
        pbar.update(pbar_counter)
    pbar.finish()

    pbar_counter = 0
    pbar = pb((extra_vars.n_s_hues + 1) * extra_vars.n_t_hues,
              'Building isoluminant stimuli [ind]')
    for i, azt in enumerate(test_hues_dklS):
        dklS_ = sp.array([test_sat_dklS, azt, isolum_el])
        c_lms_ = dklC2lms(sph2cart(dklS_))
        for j, azs in enumerate(surr_hues_dklS):
            dklS_ = sp.array([surr_sat_dklS, azs, isolum_el])
            s_lms_ = dklC2lms(sph2cart(dklS_))
            stims_ind_lms[i, j, ...,
                          0] = stim.get_center_surround(size=extra_vars.size,
                                                        csize=extra_vars.csize,
                                                        cval=c_lms_[0],
                                                        sval=s_lms_[0])
            stims_ind_lms[i, j, ...,
                          1] = stim.get_center_surround(size=extra_vars.size,
                                                        csize=extra_vars.csize,
                                                        cval=c_lms_[1],
                                                        sval=s_lms_[1])
            stims_ind_lms[i, j, ...,
                          2] = stim.get_center_surround(size=extra_vars.size,
                                                        csize=extra_vars.csize,
                                                        cval=c_lms_[2],
                                                        sval=s_lms_[2])

            pbar_counter += 1
            pbar.update(pbar_counter)
    pbar.finish()

    # compute vanilla SO features for those stimuli
    ###############################################
    so_all = sp.zeros(
        (extra_vars.n_train, nc, extra_vars.size, extra_vars.size))
    so_ind = sp.zeros((extra_vars.n_t_hues, extra_vars.n_s_hues, nc,
                       extra_vars.size, extra_vars.size))

    pbar = pb(extra_vars.n_train, 'Computing SO features [all]')
    for idx in range(extra_vars.n_train):
        so_all[idx] = GET_SO(stims_all_lms[idx], extra_vars._DEFAULT_FLOATX_NP,
                             extra_vars._DEFAULT_KW2015_SO_PARAMETERS)
        pbar.update(idx)
    pbar.finish()

    pbar = pb(extra_vars.n_t_hues * extra_vars.n_s_hues,
              'Computing SO features [ind]')
    for idx in range(extra_vars.n_t_hues):
        for jdx in range(extra_vars.n_s_hues):
            so_ind[idx, jdx] = GET_SO(stims_ind_lms[idx, jdx],
                                      extra_vars._DEFAULT_FLOATX_NP,
                                      extra_vars._DEFAULT_KW2015_SO_PARAMETERS)
            pbar.update(jdx + idx * extra_vars.n_s_hues)
    pbar.finish()
    so_ind = so_ind.reshape(extra_vars.n_t_hues * extra_vars.n_s_hues, nc,
                            extra_vars.size, extra_vars.size)

    #Final ops
    cs_hue_diff = da2ha(test_hues_dklS.reshape(extra_vars.n_t_hues, 1) - \
        surr_hues_dklS.reshape(1, extra_vars.n_s_hues))
    cs_hue_diff *= (180 / sp.pi)
    np.savez(extra_vars.f4_stimuli_file,
             so_all=so_all,
             so_ind=so_ind,
             stims_all_lms=stims_all_lms,
             cs_hue_diff=cs_hue_diff)
Beispiel #3
0
def run_reference(afferent,
                  p=_DEFAULT_PARAMETERS,
                  axis=-3,
                  maxiter=_DEFAULT_MAXITER,
                  h=_DEFAULT_STEPSIZE,
                  keeptime=_DEFAULT_KEEPTIME,
                  verbose=_DEFAULT_VERBOSE):
    """ Integrate with Forward Euler method with integration step size h
    """

    ######################################
    # re-arrange array into canonical form
    ######################################
    axis = axis % afferent.ndim
    O, initsz, nunits = to4(afferent, axis=axis)
    I, O_t, I_t = O.copy(), [], []
    if keeptime:
        O_t.append(O)
        I_t.append(I)

    ############
    # parameters
    ############
    p = _DEFAULT_PARAMETERS if p is None else p
    sigma, tau = p['sigma'], p['tau']
    epsilon, eta = p['epsilon'], p['eta']
    ssc, sss = p['ssc'], p['sss']
    gamma, alpha, mu = p['gamma'], p['alpha'], p['mu']
    delta, beta, nu = p['delta'], p['beta'], p['nu']
    xi, zeta, omega = p['xi'], p['zeta'], p['omega']

    ##############################################
    # make sure pool sizes, input sizes make sense
    ##############################################
    assert sss < afferent.shape[-2]
    assert sss < afferent.shape[-1]
    assert ssc < sss
    assert sss % 2 == 1
    assert ssc % 2 == 1

    tuned_pooling_method = 'mean'  # 'max'
    untuned_pooling_method = 'mean'  # 'max'

    #################################
    # tuned summation: center pooling
    #################################
    zeta = 1.0  # because here unlike the GPU implementation we exclude center
    # and we pulled the default parameters from GPU implementation
    pool_P = {
        'type': 'pool',
        'mode': tuned_pooling_method,
        'size': (1, 1, ssc, ssc),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': (1, 1),
        'subpool': {
            'type': None
        },
    }

    ####################################################
    # untuned suppression: reduction across feature axis
    ####################################################
    pool_U = {
        'type': 'pool',
        'mode': untuned_pooling_method,
        'size': (1, -1, 1, 1),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': None,
    }

    #####################################
    # tuned suppression: surround pooling
    #####################################
    pool_T = {
        'type': 'pool',
        'mode': tuned_pooling_method,
        'size': (1, 1, sss, sss),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': (ssc, ssc),
        'subpool': {
            'type': None
        },
    }

    ########################
    # untuned summation: cRF
    ########################
    V = sp.linspace(0.0, 1.0, nunits)
    W = stats.norm.pdf(V, loc=V[nunits // 2], scale=omega)
    W /= W.sum()
    pool_Q = {
        'type': 'conv',
        'fb': W,
        'padding': 'wrap',
        'im_dims': 'ndhw',
        'fb_dims': 'd',
        'corr': False
    }

    ###################
    # pooling functions
    ###################
    untuned_suppression = lambda arr: recursive_pool(
        arr, params=pool_U, keyname='subpool', verbose=False)
    tuned_suppression = lambda arr: recursive_pool(
        arr, params=pool_T, keyname='subpool', verbose=False)
    tuned_summation = lambda arr: recursive_pool(
        arr, params=pool_P, keyname='subpool', verbose=False)
    untuned_summation = lambda arr: recursive_pool(
        arr, params=pool_Q, keyname='subpool', verbose=False)

    relu = lambda x: hwrectify(x, '+')
    # relu = lambda x: softplus(x, 10.0)

    ###################################################
    # iterate lateral connections and store time frames
    ###################################################
    if verbose: pbar = pb(maxiter, 'Integrating [HOST]')
    for i in range(maxiter):
        U = untuned_suppression(O)
        T = tuned_suppression(O)
        P = tuned_summation(I)
        Q = untuned_summation(I)

        I_summand = relu(xi * afferent - (alpha * I + mu) * U -
                         (beta * I + nu) * T)
        I = (1. - epsilon**2 * h / eta) * I + h / eta * I_summand

        O_summand = relu(zeta * I + gamma * P + delta * Q)
        O = (1. - sigma**2 * h / tau) * O + h / tau * O_summand

        if keeptime:
            I_t.append(I)
            O_t.append(O)
        if verbose: pbar.update(i)
    if verbose: pbar.finish()

    ################
    # postprocessing
    ################
    out_I = from4(I_t if keeptime else I,
                  axis=axis,
                  keeptime=keeptime,
                  size=initsz)
    out_O = from4(O_t if keeptime else O,
                  axis=axis,
                  keeptime=keeptime,
                  size=initsz)
    afferent.shape = initsz

    return out_I, out_O
Beispiel #4
0
def run_all_interneurons(afferent,
                         axis=-3,
                         maxiter=50,
                         h=1.,
                         keeptime=True,
                         verbose=True):
    """ Integrate with Forward Euler method with integration step size h
    """

    ######################################
    # re-arrange array into canonical form
    ######################################
    axis = axis % afferent.ndim
    Pyr, initsz, nunits = to4(afferent, axis=axis)
    Pyr_t = []
    # base_shape = Pyr.shape
    # intr_shape = Pyr.shape
    # intr_shape[-3] = 1
    # Som, Som_t = sp.zeros(base_shape), []
    # Pvb, Pvb_t = sp.zeros(intr_shape), []
    # Vip, Vip_t = sp.zeros(intr_shape), []

    if keeptime:
        Pyr_t.append(Pyr)
        # Som_t.append(Som)
        # Pvb_t.append(Pvb)
        # Vip_t.append(Vip)

    ############
    # parameters
    ############

    ssc = 9
    sss = 29
    tau = 5.00
    sigma = 1.00
    omega = 0.15
    k_FF_Pyr = 2.00
    k_SE_Pyr = 1.90
    k_SI_Pyr = 2.00
    k_HE_Pyr = 1.00
    k_HI_Pyr = 3.00

    ##############################################
    # make sure pool sizes, input sizes make sense
    ##############################################
    assert sss < afferent.shape[-2]
    assert sss < afferent.shape[-1]
    assert ssc < sss
    assert sss % 2 == 1
    assert ssc % 2 == 1

    tuned_pooling_method = 'mean'  # 'max'
    untuned_pooling_method = 'mean'  # 'max'

    #################################
    # tuned summation: center pooling
    #################################
    pool_P = {
        'type': 'pool',
        'mode': tuned_pooling_method,
        'size': (1, 1, ssc, ssc),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': (1, 1),
        'subpool': {
            'type': None
        },
    }

    ####################################################
    # untuned suppression: reduction across feature axis
    ####################################################
    pool_U = {
        'type': 'pool',
        'mode': untuned_pooling_method,
        'size': (1, -1, 1, 1),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': None,
    }

    #####################################
    # tuned suppression: surround pooling
    #####################################
    pool_T = {
        'type': 'pool',
        'mode': tuned_pooling_method,
        'size': (1, 1, sss, sss),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': None,  #(ssc, ssc),
        'subpool': {
            'type': None
        },
    }

    ########################
    # untuned summation: cRF
    ########################
    V = sp.linspace(0.0, 1.0, nunits)
    W = stats.norm.pdf(V, loc=V[nunits // 2], scale=omega)
    W /= W.sum()
    pool_Q = {
        'type': 'conv',
        'fb': W,
        'padding': 'wrap',
        'im_dims': 'ndhw',
        'fb_dims': 'd',
        'corr': False
    }

    ###################
    # pooling functions
    ###################
    untuned_suppression = lambda arr: recursive_pool(
        arr, params=pool_U, keyname='subpool', verbose=False)
    tuned_suppression = lambda arr: recursive_pool(
        arr, params=pool_T, keyname='subpool', verbose=False)
    tuned_summation = lambda arr: recursive_pool(
        arr, params=pool_P, keyname='subpool', verbose=False)
    untuned_summation = lambda arr: recursive_pool(
        arr, params=pool_Q, keyname='subpool', verbose=False)
    relu = lambda x: hwrectify(x, '+')

    ###################################################
    # iterate lateral connections and store time frames
    ###################################################

    if verbose: pbar = pb(maxiter, 'Integrating [HOST]')
    for i in range(maxiter):
        U = untuned_suppression(Pyr)
        T = tuned_suppression(Pyr)
        P = tuned_summation(Pyr)
        Q = untuned_summation(Pyr)

        Pyr_dendritic = relu(k_FF_Pyr * afferent \
            + k_HE_Pyr * Q \
            + k_SE_Pyr * P \
            - k_SI_Pyr * T)
        Pyr_summand = relu(Pyr_dendritic \
            - k_HI_Pyr * U)
        Pyr = (1. - sigma**2 * h / tau) * Pyr + h / tau * Pyr_summand

        if keeptime: Pyr_t.append(Pyr)
        if verbose: pbar.update(i)
    if verbose: pbar.finish()

    ################
    # postprocessing
    ################
    out_Pyr = from4(Pyr_t if keeptime else Pyr,
                    axis=axis,
                    keeptime=keeptime,
                    size=initsz)
    afferent.shape = initsz

    return out_Pyr
Beispiel #5
0
def run(afferent,
        p=_DEFAULT_PARAMETERS_new,
        axis=-3,
        maxiter=_DEFAULT_MAXITER_new,
        h=_DEFAULT_STEPSIZE_new,
        keeptime=_DEFAULT_KEEPTIME,
        verbose=_DEFAULT_VERBOSE):
    """ Integrate with Forward Euler method with integration step size h
    """

    ######################################
    # re-arrange array into canonical form
    ######################################
    axis = axis % afferent.ndim
    Pyr, initsz, nunits = to4(afferent, axis=axis)
    Int = Pyr.copy()
    Pyr_t = []
    Int_t = []

    if keeptime:
        Pyr_t.append(Pyr)
        Int_t.append(Int)

    ############
    # parameters
    ############
    p = _DEFAULT_PARAMETERS if p is None else p
    sigma, tau = p['sigma'], p['tau']
    epsilon, eta = p['epsilon'], p['eta']
    ssc, sss = p['ssc'], p['sss']
    gamma, alpha, mu = p['gamma'], p['alpha'], p['mu']
    delta, beta, nu = p['delta'], p['beta'], p['nu']
    xi, zeta, omega = p['xi'], p['zeta'], p['omega']
    phi, psi = p['phi'], p['psi']

    ##############################################
    # make sure pool sizes, input sizes make sense
    ##############################################
    assert sss < afferent.shape[-2]
    assert sss < afferent.shape[-1]
    assert ssc < sss
    assert sss % 2 == 1
    assert ssc % 2 == 1

    tuned_pooling_method = 'mean'  # 'max'
    untuned_pooling_method = 'mean'  # 'max'

    #################################
    # tuned summation: center pooling
    #################################
    pool_P = {
        'type': 'pool',
        'mode': tuned_pooling_method,
        'size': (1, 1, ssc, ssc),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': (1, 1),
        'subpool': {
            'type': None
        },
    }

    ####################################################
    # untuned suppression: reduction across feature axis
    ####################################################
    pool_U = {
        'type': 'pool',
        'mode': untuned_pooling_method,
        'size': (1, -1, 1, 1),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': None,
    }

    #####################################
    # tuned suppression: surround pooling
    #####################################
    pool_T = {
        'type': 'pool',
        'mode': tuned_pooling_method,
        'size': (1, 1, sss, sss),
        'padding': 'reflect',
        'stride_size': None,
        'keepdims': True,
        'exclude_center': (1, 1),  #(ssc, ssc),
        'subpool': {
            'type': None
        },
    }

    ########################
    # untuned summation: cRF
    ########################
    V = sp.linspace(0.0, 1.0, nunits)
    W = stats.norm.pdf(V, loc=V[nunits // 2], scale=omega)
    W /= W.sum()
    pool_Q = {
        'type': 'conv',
        'fb': W,
        'padding': 'wrap',
        'im_dims': 'ndhw',
        'fb_dims': 'd',
        'corr': False
    }

    ###################
    # pooling functions
    ###################
    untuned_suppression = lambda arr: recursive_pool(
        arr, params=pool_U, keyname='subpool', verbose=False)
    tuned_suppression = lambda arr: recursive_pool(
        arr, params=pool_T, keyname='subpool', verbose=False)
    tuned_summation = lambda arr: recursive_pool(
        arr, params=pool_P, keyname='subpool', verbose=False)
    untuned_summation = lambda arr: recursive_pool(
        arr, params=pool_Q, keyname='subpool', verbose=False)

    relu = lambda x: hwrectify(x, '+')
    # relu = lambda x: softplus(x, 10.0)

    ###################################################
    # iterate lateral connections and store time frames
    ###################################################
    if verbose: pbar = pb(maxiter, 'Integrating [HOST]')
    for i in range(maxiter):
        U = untuned_suppression(Pyr)
        T = tuned_suppression(Pyr)
        P = tuned_summation(Pyr)
        Q = untuned_summation(Pyr)
        Int_summand = relu(zeta * Pyr \
            + alpha * U \
            + beta * T \
            - psi ** 2)
        Int = (1. - epsilon**2 * h / eta) * Int + h / eta * Int_summand

        Pyr_summand = relu(xi * afferent #+ 0.25 * tuned_summation(afferent) \
            + gamma * P \
            + delta * Q \
            - (mu * Pyr + nu) * Int \
            - phi ** 2)
        Pyr = (1. - sigma**2 * h / tau) * Pyr + h / tau * Pyr_summand

        if keeptime:
            Int_t.append(Int)
            Pyr_t.append(Pyr)
        if verbose: pbar.update(i)
    if verbose: pbar.finish()

    ################
    # postprocessing
    ################
    out_Int = from4(Int_t if keeptime else Int,
                    axis=axis,
                    keeptime=keeptime,
                    size=initsz)
    out_Pyr = from4(Pyr_t if keeptime else Pyr,
                    axis=axis,
                    keeptime=keeptime,
                    size=initsz)
    afferent.shape = initsz

    return out_Int, out_Pyr
Beispiel #6
0
    def run(self, in_array, from_gpu=True):
        """ Do numerical integration
        """

        assert in_array.ndim == 4
        if self.input_shape is None:
            self.input_shape = in_array.shape
            self._sanity_check()
            self._prepare_kernels()
            self._prepare_tensors()

        SSN = self.parameters.ssn
        SSF = self.parameters.ssf
        ETA = self.parameters.eta
        TAU = self.parameters.tau
        EPSILON = self.parameters.epsilon
        SIGMA = self.parameters.sigma
        DELTA = self.parameters.delta
        GAMMA = self.parameters.gamma
        ALPHA = self.parameters.alpha
        BETA = self.parameters.beta
        ZETA = self.parameters.zeta
        OMEGA = self.parameters.omega
        XI = self.parameters.xi

        # load copies of input into GPU
        self._gpu_input = _array2gpu(in_array)

        cucopy(self.cublasContext, self._gpu_input.size,
               self._gpu_input.gpudata, 1, self.X.gpudata, 1)
        self.cudaContext.synchronize()

        cucopy(self.cublasContext, self._gpu_input.size,
               self._gpu_input.gpudata, 1, self.Y.gpudata, 1)
        self.cudaContext.synchronize()

        if self.keeptime:
            try:
                self.X_t[0] = in_array.get()
                self.Y_t[0] = in_array.get()
            except AttributeError:
                self.X_t[0] = in_array
                self.Y_t[0] = in_array

        # create a bunch of pointers for cuDNN
        X__ptr__ = _gpuarray2ptr(self.X)
        Y__ptr__ = _gpuarray2ptr(self.Y)
        u__ptr__ = _gpuarray2ptr(self._gpu_u)
        p__ptr__ = _gpuarray2ptr(self._gpu_p)
        t__ptr__ = _gpuarray2ptr(self._gpu_t)
        buf1__ptr__ = _gpuarray2ptr(self._gpu_buf1)
        buf2__ptr__ = _gpuarray2ptr(self._gpu_buf2)
        buf3__ptr__ = _gpuarray2ptr(self._gpu_buf3)
        buf4__ptr__ = _gpuarray2ptr(self._gpu_buf4)
        if self.parameters.omega:
            q__ptr__ = _gpuarray2ptr(self._gpu_q)

        if self.verbose:
            pbar = pb(self.maxiter, 'Integrating [GPU:%i]' % (self.i_gpu, ))
        for idx in range(self.maxiter):

            # [-(alpha*X+mu) -> B2] <<<PASS>>>
            ###########################################################
            cucopy(self.cublasContext, self.X.size, self.X.gpudata, 1,
                   self._gpu_buf2.gpudata, 1)
            self.cudaContext.synchronize()

            cuscal(self.cublasContext, self._gpu_buf2.size, -ALPHA,
                   self._gpu_buf2.gpudata, 1)
            self.cudaContext.synchronize()

            self._bcastbias_cuda(self._gpu_buf2,
                                 self._gpu_negMU,
                                 block=self._bl,
                                 grid=self._gr)
            self.cudaContext.synchronize()

            # [compute(U); U -> B4] <<<PASS:max|ERR|<1e-7>>>
            ###########################################################
            cudnn.cudnnConvolutionForward(self.cudnnContext, 1.0, self._desc_Y,
                                          Y__ptr__, self._desc_u, u__ptr__,
                                          self._desc_U, self._algo_u, None, 0,
                                          0.0, self._desc_buf4, buf4__ptr__,
                                          self._cudnn_data_type)
            self.cudaContext.synchronize()

            # [B2 *= B4 := U] <<<PASS>>>
            ###########################################################
            self._bcastmul_cuda(self._gpu_buf2,
                                self._gpu_buf4,
                                block=self._bl,
                                grid=self._gr)
            self.cudaContext.synchronize()

            if self.keepvars:
                self.U_t[idx] = -1.0 * self._gpu_buf2.get()

            # [XI * L -> B1] <<<PASS>>>
            ###########################################################
            cucopy(self.cublasContext, self._gpu_input.size,
                   self._gpu_input.gpudata, 1, self._gpu_buf1.gpudata, 1)
            self.cudaContext.synchronize()

            cuscal(self.cublasContext, self._gpu_buf1.size, XI,
                   self._gpu_buf1.gpudata, 1)
            self.cudaContext.synchronize()

            ###########################################################
            ###########################################################
            # import warnings
            # warnings.warn('Shunting inhibition introduced ' + \
            #     'as an experimental feature!!!')
            # cucopy(self.cublasContext,
            #     self.X.size,self.X.gpudata, 1, self._gpu_buf3.gpudata, 1)
            # self.cudaContext.synchronize()
            # self._pwisemul_cuda(self._gpu_buf3,
            #     self._gpu_input, block=self._bl, grid=self._gr)
            # self.cudaContext.synchronize()
            # cuscal(self.cublasContext,
            #     self._gpu_buf3.size, -XI*0.5, self._gpu_buf3.gpudata, 1)
            # self.cudaContext.synchronize()
            # cuaxpy(self.cublasContext, self._gpu_buf3.size, 1.0,
            #     self._gpu_buf3.gpudata, 1, self._gpu_buf1.gpudata, 1)
            # self.cudaContext.synchronize()
            ###########################################################
            ###########################################################

            # [B1 += B2 := -(alpha*X+mu).*U] <<<PASS>>>
            ###########################################################
            cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0,
                   self._gpu_buf2.gpudata, 1, self._gpu_buf1.gpudata, 1)
            self.cudaContext.synchronize()

            # [-(beta*X+nu) -> B2] <<<PASS>>>
            ###########################################################
            cucopy(self.cublasContext, self.X.size, self.X.gpudata, 1,
                   self._gpu_buf2.gpudata, 1)
            self.cudaContext.synchronize()

            cuscal(self.cublasContext, self._gpu_buf2.size, -BETA,
                   self._gpu_buf2.gpudata, 1)
            self.cudaContext.synchronize()

            self._bcastbias_cuda(self._gpu_buf2,
                                 self._gpu_negNU,
                                 block=self._bl,
                                 grid=self._gr)
            self.cudaContext.synchronize()

            # [T<excluding_center> -> B3] <<<PASS:max|ERR|<1e-3,avg=1e-5>>>
            ###########################################################
            cudnn.cudnnConvolutionForward(self.cudnnContext, 1.0, self._desc_Y,
                                          Y__ptr__, self._desc_t, t__ptr__,
                                          self._desc_T, self._algo_t, None, 0,
                                          0.0, self._desc_buf3, buf3__ptr__,
                                          self._cudnn_data_type)
            self.cudaContext.synchronize()

            # [B2 *= B3 := T] <<<PASS>>>
            ###########################################################
            self._pwisemul_cuda(self._gpu_buf2,
                                self._gpu_buf3,
                                block=self._bl,
                                grid=self._gr)
            self.cudaContext.synchronize()

            if self.keepvars:
                self.T_t[idx] = -1.0 * self._gpu_buf2.get()

            # [B1 += B2 := -(beta*X+nu).*T] <<<PASS>>>
            ###########################################################
            cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0,
                   self._gpu_buf2.gpudata, 1, self._gpu_buf1.gpudata, 1)
            self.cudaContext.synchronize()

            # [now B1 := X_summand; rectify(B1) -> B2] <<<PASS>>>
            ###########################################################
            cudnn.cudnnActivationForward(self.cudnnContext,
                                         self._cudnn_relu_act, 1.0,
                                         self._desc_buf1, buf1__ptr__, 0.0,
                                         self._desc_buf2, buf2__ptr__)
            self.cudaContext.synchronize()

            # [B2 *= h/eta] <<<PASS>>>
            ###########################################################
            cuscal(self.cublasContext, self._gpu_buf2.size,
                   self.stepsize * 1.0 / ETA, self._gpu_buf2.gpudata, 1)
            self.cudaContext.synchronize()

            # [X *= (1-epsilon**2 * h/eta)] <<<PASS>>>
            ###########################################################
            cuscal(self.cublasContext, self.X.size,
                   (1.0 - EPSILON**2 * self.stepsize * 1.0 / ETA),
                   self.X.gpudata, 1)
            self.cudaContext.synchronize()

            # [X += B2 := h/eta * X_summand] <<<PASS>>>
            ###########################################################
            cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0,
                   self._gpu_buf2.gpudata, 1, self.X.gpudata, 1)
            self.cudaContext.synchronize()

            # [X done; X -> B1] <<<ASSUMED_PASS>>>
            ###########################################################
            cucopy(self.cublasContext, self.X.size, self.X.gpudata, 1,
                   self._gpu_buf1.gpudata, 1)
            self.cudaContext.synchronize()

            # [B1 = zeta * B1 + gamma * P] <<<ASSUMED_PASS:max|ERR|<1e-7>>>
            ###########################################################
            cudnn.cudnnConvolutionForward(self.cudnnContext, GAMMA,
                                          self._desc_X, X__ptr__, self._desc_p,
                                          p__ptr__, self._desc_P, self._algo_p,
                                          None, 0, ZETA, self._desc_buf1,
                                          buf1__ptr__, self._cudnn_data_type)
            self.cudaContext.synchronize()

            if self.keepvars:
                self.Q_t[idx] = self._gpu_buf1.get()
                self.P_t[idx] = self.Q_t[idx] - ZETA * self.X.get()

            # [B1 = 1.0 * B1 + delta * Q] <<<ASSUMED_PASS>>>
            ###########################################################
            if self.parameters.omega:
                cudnn.cudnnConvolutionForward(
                    self.cudnnContext, DELTA, self._desc_X, X__ptr__,
                    self._desc_q, q__ptr__, self._desc_Q, self._algo_q, None,
                    0, 1.0, self._desc_buf1, buf1__ptr__,
                    self._cudnn_data_type)
                self.cudaContext.synchronize()

            if self.keepvars:
                self.Q_t[idx] = self._gpu_buf1.get() - self.Q_t[idx]

            # [rectify(B1) -> B2] <<<PASS>>>
            ###########################################################
            cudnn.cudnnActivationForward(self.cudnnContext,
                                         self._cudnn_relu_act, 1.0,
                                         self._desc_buf1, buf1__ptr__, 0.0,
                                         self._desc_buf2, buf2__ptr__)
            self.cudaContext.synchronize()

            # [now B2 := Y_summand; B2 *= h/tau] <<<PASS>>>
            ###########################################################
            cuscal(self.cublasContext, self._gpu_buf2.size,
                   self.stepsize * 1.0 / TAU, self._gpu_buf2.gpudata, 1)
            self.cudaContext.synchronize()

            # [Y *= (1-sigma**2 * h/tau)] <<<PASS>>>
            ###########################################################
            cuscal(self.cublasContext, self.Y.size,
                   (1.0 - SIGMA**2 * self.stepsize * 1.0 / TAU),
                   self.Y.gpudata, 1)
            self.cudaContext.synchronize()

            # [Y += B2 := h/tau *Y _summand; then Y is done] <<<PASS>>>
            ###########################################################
            cuaxpy(self.cublasContext, self._gpu_buf2.size, 1.0,
                   self._gpu_buf2.gpudata, 1, self.Y.gpudata, 1)
            self.cudaContext.synchronize()

            if self.keeptime:
                self.X_t[idx + 1] = self.X.get()
                self.Y_t[idx + 1] = self.Y.get()

            if self.verbose: pbar.update(idx)
        if self.verbose: pbar.finish()

        if from_gpu:
            self.X = self.X.get()
            self.Y = self.Y.get()