Example #1
0
    def get_value(self, tau0):
        dt = self.delta
        ar, cr, a, b, c, d = self.term.coefficients

        # Format the lags correctly
        tau0 = tt.abs_(tau0)
        tau = tau0[..., None]

        # Precompute some factors
        dpt = dt + tau
        dmt = dt - tau

        # Real parts:
        # tau > Delta
        crd = cr * dt
        cosh = tt.cosh(crd)
        norm = 2 * ar / crd ** 2
        K_large = tt.sum(norm * (cosh - 1) * tt.exp(-cr * tau), axis=-1)

        # tau < Delta
        crdmt = cr * dmt
        K_small = K_large + tt.sum(norm * (crdmt - tt.sinh(crdmt)), axis=-1)

        # Complex part
        cd = c * dt
        dd = d * dt
        c2 = c ** 2
        d2 = d ** 2
        c2pd2 = c2 + d2
        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d
        norm = 1.0 / (dt * c2pd2) ** 2
        k0 = tt.exp(-c * tau)
        cdt = tt.cos(d * tau)
        sdt = tt.sin(d * tau)

        # For tau > Delta
        cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1)
        sin_term = 2 * (tt.sinh(cd) * tt.sin(dd))
        factor = k0 * norm
        K_large += tt.sum(
            (C1 * cos_term - C2 * sin_term) * factor * cdt, axis=-1
        )
        K_large += tt.sum(
            (C2 * cos_term + C1 * sin_term) * factor * sdt, axis=-1
        )

        # tau < Delta
        edmt = tt.exp(-c * dmt)
        edpt = tt.exp(-c * dpt)
        cos_term = (
            edmt * tt.cos(d * dmt) + edpt * tt.cos(d * dpt) - 2 * k0 * cdt
        )
        sin_term = (
            edmt * tt.sin(d * dmt) + edpt * tt.sin(d * dpt) - 2 * k0 * sdt
        )
        K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
        K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)

        return tt.switch(tt.le(tau0, dt), K_small, K_large)
Example #2
0
    def get_coefficients(self):
        ar, cr, a, b, c, d = self.term.coefficients

        # Real componenets
        crd = cr * self.delta
        coeffs = [2 * ar * (tt.cosh(crd) - 1) / crd**2, cr]

        # Imaginary coefficients
        cd = c * self.delta
        dd = d * self.delta
        c2 = c**2
        d2 = d**2
        factor = 2.0 / (self.delta * (c2 + d2))**2
        cos_term = tt.cosh(cd) * tt.cos(dd) - 1
        sin_term = tt.sinh(cd) * tt.sin(dd)

        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d

        coeffs += [
            factor * (C1 * cos_term - C2 * sin_term),
            factor * (C2 * cos_term + C1 * sin_term),
            c,
            d,
        ]

        return coeffs
Example #3
0
def test_jax_multioutput():
    x = tt.vector("x")
    x.tag.test_value = np.r_[1.0, 2.0].astype(theano.config.floatX)
    y = tt.vector("y")
    y.tag.test_value = np.r_[3.0, 4.0].astype(theano.config.floatX)

    w = tt.cosh(x**2 + y / 3.0)
    v = tt.cosh(x / 3.0 + y**2)

    fgraph = theano.gof.FunctionGraph([x, y], [w, v])

    compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
Example #4
0
    def __create_axons(self, w_init_range=5.0):
        
        I = T.vector('I')        
        
        #Define W terms:
        self.V = theano.shared(np.zeros(self.num_neurons, dtype=theano.config.floatX), 'V')
        self.W = theano.shared(np.zeros(self.num_neurons, dtype=theano.config.floatX), 'W')
        
        #Neuron equations defined according to Morris-Lecar model DE
        # and approximated using Euler's method
        m_inf = 1/2.0*( 1+T.tanh( (self.V-self.a1) / self.a2 ) )
        w_inf = 1/2.0*( 1+T.tanh( (self.V-self.a3) / self.a4 ) )
        t_inf = 1/T.cosh( (self.V-self.a3) / (2.0*self.a4) )
        Wnew = self.phi*( (w_inf-self.W) / t_inf )
        Vnew =  (-self.gC * m_inf * (self.V-self.eC) -
                self.gK * self.W * (self.V-self.eK) -
                self.gL * (self.V-self.eL) + I)/self.C
                
        w_update = [(self.W, self.W + self.del_t*Wnew)]

        Vout = self.V + self.del_t*Vnew
        v_update = [(self.V, Vout)]
        
        updates = w_update + v_update
        
        return theano.function(inputs=[I], outputs=[Vout, self.W], updates=updates, allow_input_downcast=True)          
Example #5
0
def test_jax_basic():
    x = tt.matrix("x")
    y = tt.matrix("y")

    # `ScalarOp`
    z = tt.cosh(x**2 + y / 3.0)

    # `[Inc]Subtensor`
    out = tt.set_subtensor(z[0], -10.0)
    out = tt.inc_subtensor(out[0, 1], 2.0)
    out = out[:5, :3]

    out_fg = theano.gof.FunctionGraph([x, y], [out])

    test_input_vals = [
        np.tile(np.arange(10), (10, 1)).astype(tt.config.floatX),
        np.tile(np.arange(10, 20), (10, 1)).astype(tt.config.floatX),
    ]
    (jax_res, ) = compare_jax_and_py(out_fg, test_input_vals)

    # Confirm that the `Subtensor` slice operations are correct
    assert jax_res.shape == (5, 3)

    # Confirm that the `IncSubtensor` operations are correct
    assert jax_res[0, 0] == -10.0
    assert jax_res[0, 1] == -8.0

    out = tt.clip(x, y, 5)
    out_fg = theano.gof.FunctionGraph([x, y], [out])
    (jax_res, ) = compare_jax_and_py(out_fg, test_input_vals)
Example #6
0
def test_reallocation():
    x = tensor.scalar('x')
    y = tensor.scalar('y')
    z = tensor.tanh(3 * x + y) + tensor.cosh(x + 5 * y)
    for l in ['vm_nogc', 'vm', 'vm_nogc', 'vm']:
        m = theano.compile.get_mode(theano.Mode(linker=l))
        m = m.excluding('fusion', 'inplace')

        f = theano.function([x, y], z, name="test_reduce_memory",
                            mode=m)
        output = f(1, 2)
        assert output
        storage_map = f.fn.storage_map

        def check_storage(storage_map):
            from theano.tensor.var import TensorConstant
            for i in storage_map.keys():
                if not isinstance(i, TensorConstant):
                    keys_copy = storage_map.keys()[:]
                    keys_copy.remove(i)
                    for o in keys_copy:
                        if (storage_map[i][0] and
                                storage_map[i][0] is storage_map[o][0]):
                            return [True, storage_map[o][0]]
            return [False, None]

        assert check_storage(storage_map)[0]
        assert len(set([id(v) for v in
                        storage_map.values()])) < len(storage_map)
Example #7
0
    def get_celerite_matrices(self, x, diag):
        dt = self.delta
        ar, cr, a, b, c, d = self.term.coefficients

        # Real part
        cd = cr * dt
        delta_diag = 2 * tt.sum(ar * (cd - tt.sinh(cd)) / cd**2)

        # Complex part
        cd = c * dt
        dd = d * dt
        c2 = c**2
        d2 = d**2
        c2pd2 = c2 + d2
        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d
        norm = (dt * c2pd2)**2
        sinh = tt.sinh(cd)
        cosh = tt.cosh(cd)
        delta_diag += 2 * tt.sum(
            (C2 * cosh * tt.sin(dd) - C1 * sinh * tt.cos(dd) +
             (a * c + b * d) * dt * c2pd2) / norm)

        new_diag = diag + delta_diag

        # new_diag = diag
        return super(IntegratedTerm, self).get_celerite_matrices(x, new_diag)
Example #8
0
def test_reallocation():
    x = tensor.scalar("x")
    y = tensor.scalar("y")
    z = tensor.tanh(3 * x + y) + tensor.cosh(x + 5 * y)
    # The functinality is currently implement for non lazy and non c VM only.
    for linker in [
            vm.VM_Linker(allow_gc=False, lazy=False, use_cloop=False),
            vm.VM_Linker(allow_gc=True, lazy=False, use_cloop=False),
    ]:
        m = theano.compile.get_mode(theano.Mode(linker=linker))
        m = m.excluding("fusion", "inplace")

        f = theano.function([x, y], z, name="test_reduce_memory", mode=m)
        output = f(1, 2)
        assert output
        storage_map = f.fn.storage_map

        def check_storage(storage_map):
            from theano.tensor.var import TensorConstant

            for i in storage_map:
                if not isinstance(i, TensorConstant):
                    keys_copy = list(storage_map.keys())[:]
                    keys_copy.remove(i)
                    for o in keys_copy:
                        if storage_map[i][
                                0] and storage_map[i][0] is storage_map[o][0]:
                            return [True, storage_map[o][0]]
            return [False, None]

        assert check_storage(storage_map)[0]
        assert len(set(id(v) for v in storage_map.values())) < len(storage_map)
Example #9
0
def test_reallocation():
    x = tensor.scalar('x')
    y = tensor.scalar('y')
    z = tensor.tanh(3 * x + y) + tensor.cosh(x + 5 * y)
    # The functinality is currently implement for non lazy and non c VM only.
    for l in [vm.VM_Linker(allow_gc=False, lazy=False, use_cloop=False),
              vm.VM_Linker(allow_gc=True, lazy=False, use_cloop=False)]:
        m = theano.compile.get_mode(theano.Mode(linker=l))
        m = m.excluding('fusion', 'inplace')

        f = theano.function([x, y], z, name="test_reduce_memory",
                            mode=m)
        output = f(1, 2)
        assert output
        storage_map = f.fn.storage_map

        def check_storage(storage_map):
            from theano.tensor.var import TensorConstant
            for i in storage_map:
                if not isinstance(i, TensorConstant):
                    keys_copy = list(storage_map.keys())[:]
                    keys_copy.remove(i)
                    for o in keys_copy:
                        if (storage_map[i][0] and
                                storage_map[i][0] is storage_map[o][0]):
                            return [True, storage_map[o][0]]
            return [False, None]

        assert check_storage(storage_map)[0]
        assert len(set(id(v) for v in
                       itervalues(storage_map))) < len(storage_map)
Example #10
0
    def value(self, tau0):
        dt = self.delta
        ar, cr, a, b, c, d = self.term.coefficients

        # Format the lags correctly
        tau0 = tt.abs_(tau0)
        tau = tt.reshape(tau0,
                         tt.concatenate([tau0.shape, [1]]),
                         ndim=tau0.ndim + 1)

        # Precompute some factors
        dpt = dt + tau
        dmt = dt - tau

        # Real parts:
        # tau > Delta
        crd = cr * dt
        norm = 1.0 / (crd)**2
        factor = (tt.exp(crd) + tt.exp(-crd) - 2) * norm,
        K_large = tt.sum(ar * tt.exp(-cr * tau) * factor, axis=-1)

        # tau < Delta
        K_small = tt.sum((2 * cr * (dmt) + tt.exp(-cr * dmt) +
                          tt.exp(-cr * dpt) - 2 * tt.exp(-cr * tau)) * norm,
                         axis=-1)

        # Complex part
        cd = c * dt
        dd = d * dt
        c2 = c**2
        d2 = d**2
        c2pd2 = c2 + d2
        C1 = a * (c2 - d2) + 2 * b * c * d
        C2 = b * (c2 - d2) - 2 * a * c * d
        norm = 1.0 / (dt * c2pd2)**2
        k0 = tt.exp(-c * tau)
        cdt = tt.cos(d * tau)
        sdt = tt.sin(d * tau)

        # For tau > Delta
        cos_term = 2 * (tt.cosh(cd) * tt.cos(dd) - 1)
        sin_term = 2 * (tt.sinh(cd) * tt.sin(dd))
        factor = k0 * norm
        K_large += tt.sum((C1 * cos_term - C2 * sin_term) * factor * cdt,
                          axis=-1)
        K_large += tt.sum((C2 * cos_term + C1 * sin_term) * factor * sdt,
                          axis=-1)

        # Real part
        edmt = tt.exp(-c * dmt)
        edpt = tt.exp(-c * dpt)
        cos_term = edmt * tt.cos(d * dmt) + edpt * tt.cos(
            d * dpt) - 2 * k0 * cdt
        sin_term = edmt * tt.sin(d * dmt) + edpt * tt.sin(
            d * dpt) - 2 * k0 * sdt
        K_small += tt.sum(2 * (a * c + b * d) * c2pd2 * dmt * norm, axis=-1)
        K_small += tt.sum((C1 * cos_term + C2 * sin_term) * norm, axis=-1)

        return tt.switch(tt.le(tau0, dt), K_small, K_large)
Example #11
0
 def forward(self, x):
     # x: Nxd
     pretanh = T.dot(x, self.wmked) + self.b  # N x d
     coshsqr = T.sqr(T.cosh(pretanh))  # N x d
     y = x + self.u * T.tanh(pretanh)
     logjaco = T.sum(T.log(T.abs_(1. + self.u / coshsqr * self.wdiag)),
                     axis=1)
     return y, logjaco  # N x d,  N
Example #12
0
    def changing_weight(self, v_sample):
        """
        function to compute the transition probability flipping spins for v_sample,
        which is Ts's=conj(psi(s',M))/conj(psi(s,M)),
        as for transverse field Ising model the flipping term in the Hamiltonian
        is hf=h/2(sp_i+sm_i), therefore flip each site contribute the same energy h/2,
        but the Ts's is different, but one can sum up Ts's for all s'
         :param v_sample: one sample of the visible layer
         :param Hamiltonian: Hamiltonian of the physical system we concern, 
         we mainly use Hamiltonian.h
         :pbc: periodic boundary condition, 1:periodic, 0: open

        """
        # As self.W_real has the size of nvisible*nhidden, and here v_sample is a
        # vector of nvisible, so ones needs to transpose self.W_real to make it broadcastable
        exponent=-2*v_sample*self.vbias+\
                 T.sum(
                     T.log(T.cosh(self.hbias-self.W_real*v_sample)),axis=1)-\
                 T.sum(
                     T.log(T.cosh(self.hbias+self.W_real*v_sample)),axis=1)
        return T.sum(T.exp(exponent))
Example #13
0
    def forward(self, x):  # x: B x N x d
        ys = list()
        logjacos = list()
        for id in range(self.batchsize):
            pretanh = T.dot(x[id], self.wmked[id]) + self.b[id]
            coshsqr = T.sqr(T.cosh(pretanh))
            y = x[id] + self.u[id] * T.tanh(pretanh)  # N x d
            logjaco = T.sum(T.log(
                T.abs_(1. + self.u[id] / coshsqr * self.wdiag[id])),
                            axis=1)

            ys.append(y)
            logjacos.append(logjaco)
        outy = T.concatenate(ys).reshape(
            (self.batchsize, self.splsize, self.dim))  # B x N x d
        outlogjaco = T.concatenate(logjacos).reshape(
            (self.batchsize, self.splsize))  # B x N
        return outy, outlogjaco
Example #14
0
 def axon(V, I, num_neurons, del_t, w_init_range=5.0):
     
     #Define W terms:
     W = theano.shared((np.random.randn(num_neurons)-0.5)*w_init_range, 'W')
     #Neuron equations defined according to Morris-Lecar model DE
     # and approximated using Euler's method
     m_inf = 1/2.0*( 1+T.tanh( (V-a1) / a2 ) )
     w_inf = 1/2.0*( 1+T.tanh( (V-a3) / a4 ) )
     t_inf = 1/T.cosh( (V-a3) / (2.0*a4) )
     Wnew = phi*( (w_inf-W) / t_inf )
     Vnew =  (-gC * m_inf * (V-eC) -
             gK * W * (V-eK) -
             gL * (V-eL) + I)/C
             
     updates = [(W, W + del_t*Wnew)]
     Vout = V + del_t*Vnew
     
     return theano.function(inputs=[V, I], outputs=[Vout, W], updates=updates, allow_input_downcast=True)
Example #15
0
def smooth_huber_loss(y, pred, w):
    """Regression loss function, smooth version of Huber loss function. """
    return T.mean(w * T.log(T.cosh(y - pred)))
Example #16
0
def coshApx(x, offset=1.5):
    out = T.switch(compAbs(x, offset), .5 * T.exp(T.abs_(x)), T.cosh(x))
    return out
Example #17
0
 def logDetJacobian(self):
     grads = 1./( T.sqr(T.cosh(self.x))+ZERO )
     return T.mean( T.sum( T.log( T.abs_(grads)+ZERO) ,axis=1 ) )
Example #18
0
 def logDetJacobian(self):
     dtrans = 1./(T.cosh(self.active)**2)
     # dtrans = T.grad(T.sum(self.active),wrt=self.x)
     return T.mean(T.log(T.abs_(1.+T.dot(self.u,self.w)*dtrans )))
Example #19
0
def logcosh(inpt):
    return T.log(T.cosh(inpt))
Example #20
0
def coshsqrApx(x, offset=1.5):
    out = T.switch(compAbs(x, offset), .25 * T.exp(2 * T.abs_(x)),
                   T.sqr(T.cosh(x)))
    return out
Example #21
0
def test_jax_basic():
    x = tt.matrix("x")
    y = tt.matrix("y")
    b = tt.vector("b")

    # `ScalarOp`
    z = tt.cosh(x**2 + y / 3.0)

    # `[Inc]Subtensor`
    out = tt.set_subtensor(z[0], -10.0)
    out = tt.inc_subtensor(out[0, 1], 2.0)
    out = out[:5, :3]

    out_fg = theano.gof.FunctionGraph([x, y], [out])

    test_input_vals = [
        np.tile(np.arange(10), (10, 1)).astype(theano.config.floatX),
        np.tile(np.arange(10, 20), (10, 1)).astype(theano.config.floatX),
    ]
    (jax_res, ) = compare_jax_and_py(out_fg, test_input_vals)

    # Confirm that the `Subtensor` slice operations are correct
    assert jax_res.shape == (5, 3)

    # Confirm that the `IncSubtensor` operations are correct
    assert jax_res[0, 0] == -10.0
    assert jax_res[0, 1] == -8.0

    out = tt.clip(x, y, 5)
    out_fg = theano.gof.FunctionGraph([x, y], [out])
    compare_jax_and_py(out_fg, test_input_vals)

    out = tt.diagonal(x, 0)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [np.arange(10 * 10).reshape((10, 10)).astype(theano.config.floatX)])

    out = tt.slinalg.cholesky(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype(
            theano.config.floatX)],
    )

    # not sure why this isn't working yet with lower=False
    out = tt.slinalg.Cholesky(lower=False)(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype(
            theano.config.floatX)],
    )

    out = tt.slinalg.solve(x, b)
    out_fg = theano.gof.FunctionGraph([x, b], [out])
    compare_jax_and_py(
        out_fg,
        [
            np.eye(10).astype(theano.config.floatX),
            np.arange(10).astype(theano.config.floatX),
        ],
    )

    out = tt.nlinalg.alloc_diag(b)
    out_fg = theano.gof.FunctionGraph([b], [out])
    compare_jax_and_py(out_fg, [np.arange(10).astype(theano.config.floatX)])

    out = tt.nlinalg.det(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [np.arange(10 * 10).reshape((10, 10)).astype(theano.config.floatX)])

    out = tt.nlinalg.matrix_inverse(x)
    out_fg = theano.gof.FunctionGraph([x], [out])
    compare_jax_and_py(
        out_fg,
        [(np.eye(10) + np.random.randn(10, 10) * 0.01).astype(
            theano.config.floatX)],
    )
Example #22
0
def logcosh(inpt):
    return T.log(T.cosh(inpt))
Example #23
0
 def logdet_dinv(self, y):
     return tt.sum(tt.log(tt.cosh(self.shift + self.scale * tt.arcsinh(y)))
                   ) + y.shape[0].astype(th.config.floatX) * tt.log(
                       self.scale) - 0.5 * tt.sum(tt.log1p(y**2))
Example #24
0
    def __init__(self, W0, X, penalty=0.01):

        assert W0.shape[1] == X.shape[1], 'Dimensions do not match. W: (k,n), X: (m,n)'

        self.penalty = penalty
        self.W_init = W0.copy()
        print('Compiling ...')

        # build objective and gradientV
        W = T.dmatrix('W')
        x = T.dvector('x')
        u = W.dot(x)
        obj = 0.5 * T.sum((W.T.dot(u) - x).ravel() ** 2) + self.penalty * T.sum(T.log(T.cosh(u)))

        self.f = theano.function([W, x], obj)
        self.df = theano.function([W, x], T.grad(obj, W))

        # initialize optimizer
        self.X = X
        self.data = [x for x in X]
        self.optimizer = SFO(self.f_df, W0, self.data)

        print('Done.')
Example #25
0
def softabs(x):
  """"""
  
  return T.log(np.float32(2)*T.cosh(x)) - T.log(np.float32(2))
Example #26
0
def coshsqrinvApx(x, offset=1.5):
    out = T.switch(compAbs(x, offset), 4. * T.exp(-2. * T.abs_(x)),
                   T.sqr(T.cosh(x)**-1))
    return out
    def __init__(self, in_dim, h_dim, out_dim, alpha, momentum):

        self.alpha = alpha
        self.momentum = momentum

        self.dims = [in_dim, h_dim, out_dim]

        _in = np.zeros((1, in_dim), dtype=np.float32)
        _h = np.zeros((1, h_dim), dtype=np.float32)
        _out = np.zeros((1, out_dim), dtype=np.float32)

        self._in = theano.shared(name='_in', value=_in.astype(theano.config.floatX), borrow=True)
        self._h = theano.shared(name='_h', value=_h.astype(theano.config.floatX), borrow=True)
        self._out = theano.shared(name='_out', value=_out.astype(theano.config.floatX), borrow=True)

        # in_h_Ws = uniform_f(in_dim, h_dim)
        # h_out_Ws = uniform_f(h_dim, out_dim)
        in_h_Ws = np.random.normal(0.5, np.sqrt(0.25), (in_dim, h_dim)).astype(np.float32)
        h_out_Ws = np.random.normal(0.5, np.sqrt(0.25), (h_dim, out_dim)).astype(np.float32)

        self.in_h_Ws = theano.shared(name='in_h_Ws', value=in_h_Ws.astype(theano.config.floatX), borrow=True)
        self.h_out_Ws = theano.shared(name='h_out_Ws', value=h_out_Ws.astype(theano.config.floatX), borrow=True)

        prev_dW1 = np.zeros_like(in_h_Ws, dtype=np.float32)
        prev_dW2 = np.zeros_like(h_out_Ws, dtype=np.float32)
        self.prev_delta_W_in_h = theano.shared(name='prev_delta_W_in_h', value=prev_dW1.astype(theano.config.floatX),
                                               borrow=True)
        self.prev_delta_W_h_out = theano.shared(name='prev_delta_W_h_out', value=prev_dW2.astype(theano.config.floatX),
                                                borrow=True)

        new_input = T.fmatrix()
        input_hidden_Ws = T.fmatrix()
        hidden_output_Ws = T.fmatrix()
        sum_h = new_input.dot(input_hidden_Ws)
        next_h = T.tanh(sum_h)
        sum_out = next_h.dot(hidden_output_Ws)
        next_out = T.tanh(sum_out)

        self.feed_forward = theano.function([new_input, input_hidden_Ws, hidden_output_Ws],
                                            updates=[(self._in, new_input), (self._h, next_h), (self._out, next_out)])

        self.set_output = theano.function([new_input], updates=[(self._out, new_input)])

        Ws_h_out = T.fmatrix()
        Ws_in_h = T.fmatrix()
        prev_delta_W_in_h = T.fmatrix()
        prev_delta_W_h_out = T.fmatrix()
        o_in = T.fmatrix()
        o_h = T.fmatrix()
        o_out = T.fmatrix()
        target_out = T.fmatrix()

        # L2 norm
        tmp = o_out-target_out
        error = tmp

        tmp_grad_h_out = np.ones_like(o_out, dtype=np.float32) / T.cosh(o_out)
        diracs_out = error * tmp_grad_h_out * tmp_grad_h_out
        delta_W_h_out = - self.alpha * o_h.T.dot(diracs_out) + self.momentum * prev_delta_W_h_out
        new_Ws_h_out = Ws_h_out + delta_W_h_out

        tmp_grad_in_h = np.ones_like(o_h, dtype=np.float32) / T.cosh(o_h)
        diracs_h_layer_terms = tmp_grad_in_h * tmp_grad_in_h
        diracs_h_chain = diracs_out.dot(Ws_h_out.T)
        diracs_h = diracs_h_chain * diracs_h_layer_terms
        delta_W_in_h = - self.alpha * o_in.T.dot(diracs_h) + self.momentum * prev_delta_W_in_h
        new_Ws_in_h = Ws_in_h + delta_W_in_h

        self.back_propagate = theano.function([Ws_in_h, Ws_h_out, o_in, o_h, o_out, target_out,
                                               prev_delta_W_in_h, prev_delta_W_h_out],
                                              updates=[(self.h_out_Ws, new_Ws_h_out), (self.in_h_Ws, new_Ws_in_h),
                                                       (self.prev_delta_W_in_h, delta_W_in_h),
                                                       (self.prev_delta_W_h_out, delta_W_h_out)])

        # self.set_input = theano.function([new_input], updates=[(self._in, new_input)])
        new_weights = T.fmatrix('new_weights')
        self.update_in_h_weights = theano.function([new_weights], updates=[(self.in_h_Ws, new_weights)])
        self.update_h_out_weights = theano.function([new_weights], updates=[(self.h_out_Ws, new_weights)])
Example #28
0
def smooth_huber_loss(y, pred, w):
    """Regression loss function, smooth version of Huber loss function. """
    return T.mean(w * T.log(T.cosh(y - pred)))