def GMMdisag2(y, mu, sig, coeff, mu2, sig2, coeff2):
    """
    Gaussian mixture model negative log-likelihood

    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """
    y1 = y[:,0].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')#[:,0,:]
    y2 = y[:,1].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')#[:,1,:]
    y1.name = 'y1_shuffled'
    y2.name = 'y2_shuffled'
    #coeff = coeff.reshape((coeff.shape[0], 1,coeff.shape[1] ))
    mu = mu.reshape((mu.shape[0],mu.shape[1]//coeff.shape[-1],coeff.shape[-1]))
    sig = sig.reshape((sig.shape[0],sig.shape[1]//coeff.shape[-1],coeff.shape[-1]))

    mu2 = mu2.reshape((mu2.shape[0],mu2.shape[1]//coeff2.shape[-1],coeff2.shape[-1]))
    sig2 = sig2.reshape((sig2.shape[0],sig2.shape[1]//coeff2.shape[-1],coeff2.shape[-1]))

    inner1 = -0.5 * T.sum(T.sqr(y1 - mu) / sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=1)
    inner1.name = 'inner'
    nll1 = -logsumexp(T.log(coeff) + inner1, axis=1)
    nll1.name = 'logsum'

    inner2 = -0.5 * T.sum(T.sqr(y2 - mu2) / sig2**2 + 2 * T.log(sig2) + T.log(2 * np.pi), axis=1)
    nll2 = -logsumexp(T.log(coeff2) + inner2, axis=1)
    nll = nll1 + nll2
    return nll
def GMMdisag5(y, mu, sig, coeff, mu2, sig2, coeff2, mu3, sig3, coeff3, mu4, sig4, coeff4, mu5, sig5, coeff5):
    """
    Gaussian mixture model negative log-likelihood

    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """
    y1 = y[:,0].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')#[:,0,:]
    y2 = y[:,1].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')#[:,1,:]
    y3 = y[:,2].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
    y4 = y[:,3].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
    y5 = y[:,4].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')

    y1.name = 'y1_shuffled'
    y2.name = 'y2_shuffled'
    y3.name = 'y3_shuffled'
    y4.name = 'y4_shuffled'
    y5.name = 'y5_shuffled'
    #coeff = coeff.reshape((coeff.shape[0], 1,coeff.shape[1] ))
    mu = mu.reshape((mu.shape[0],mu.shape[1]//coeff.shape[-1],coeff.shape[-1]))
    sig = sig.reshape((sig.shape[0],sig.shape[1]//coeff.shape[-1],coeff.shape[-1]))

    mu2 = mu2.reshape((mu2.shape[0],mu2.shape[1]//coeff2.shape[-1],coeff2.shape[-1]))
    sig2 = sig2.reshape((sig2.shape[0],sig2.shape[1]//coeff2.shape[-1],coeff2.shape[-1]))

    mu3 = mu3.reshape((mu3.shape[0],mu3.shape[1]//coeff3.shape[-1],coeff3.shape[-1]))
    sig3 = sig3.reshape((sig3.shape[0],sig3.shape[1]//coeff3.shape[-1],coeff3.shape[-1]))

    mu4 = mu4.reshape((mu4.shape[0],mu4.shape[1]//coeff4.shape[-1],coeff4.shape[-1]))
    sig4 = sig4.reshape((sig4.shape[0],sig4.shape[1]//coeff4.shape[-1],coeff4.shape[-1]))

    mu5 = mu5.reshape((mu5.shape[0],mu5.shape[1]//coeff5.shape[-1],coeff5.shape[-1]))
    sig5 = sig5.reshape((sig5.shape[0],sig5.shape[1]//coeff5.shape[-1],coeff5.shape[-1]))

    inner1 = -0.5 * T.sum(T.sqr(y1 - mu) / sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=1)
    inner1.name = 'inner'
    nll1 = -logsumexp(T.log(coeff) + inner1, axis=1)
    nll1.name = 'logsum'

    inner2 = -0.5 * T.sum(T.sqr(y2 - mu2) / sig2**2 + 2 * T.log(sig2) + T.log(2 * np.pi), axis=1)
    nll2 = -logsumexp(T.log(coeff2) + inner2, axis=1)

    inner3 = -0.5 * T.sum(T.sqr(y3 - mu3) / sig3**2 + 2 * T.log(sig3) + T.log(2 * np.pi), axis=1)
    nll3 = -logsumexp(T.log(coeff3) + inner3, axis=1)

    inner4 = -0.5 * T.sum(T.sqr(y4 - mu4) / sig4**2 + 2 * T.log(sig4) + T.log(2 * np.pi), axis=1)
    nll4 = -logsumexp(T.log(coeff4) + inner4, axis=1)

    inner5 = -0.5 * T.sum(T.sqr(y5 - mu5) / sig5**2 + 2 * T.log(sig5) + T.log(2 * np.pi), axis=1)
    nll5 = -logsumexp(T.log(coeff5) + inner5, axis=1)

    nll = nll1 + nll2 + nll3 + nll4 + nll5
    return nll
Exemple #3
0
def GMM(y, mu, sig, coeff):
    """
    Gaussian mixture model negative log-likelihood
    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """

    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, "x")

    mu = mu.reshape((-1, mu.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    sig = sig.reshape((-1, sig.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    coeff = coeff.reshape((-1, coeff.shape[-1]))

    inner = -0.5 * T.sum(T.sqr(y - mu) / sig ** 2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=-2)

    nll = -logsumexp(T.log(coeff) + inner, axis=-1)

    # Adjust dimension
    new_dim = T.set_subtensor(shape_y[-1], 1)

    nll = nll.reshape(new_dim, ndim=n_dim)
    nll = nll.flatten(n_dim - 1)

    return nll
Exemple #4
0
def GMM(y, mu, sig, coeff):
    """
    Gaussian mixture model negative log-likelihood
    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """

    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, 'x')

    mu = mu.reshape((-1, mu.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    sig = sig.reshape((-1, sig.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    coeff = coeff.reshape((-1, coeff.shape[-1]))

    inner = -0.5 * T.sum(
        T.sqr(y - mu) / sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=-2)

    nll = -logsumexp(T.log(coeff) + inner, axis=-1)
    return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
def GMM(y, mu, sig, coeff):
    """
    Gaussian mixture model negative log-likelihood

    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """
    y = y.dimshuffle(0, 1, 'x')
    y.name = 'y_shuffled'
    mu = mu.reshape((mu.shape[0],
                     mu.shape[1]//coeff.shape[-1],
                     coeff.shape[-1]))
    mu.name = 'mu'
    sig = sig.reshape((sig.shape[0],
                       sig.shape[1]//coeff.shape[-1],
                       coeff.shape[-1]))
    sig.name = 'sig'
    a = T.sqr(y - mu)
    a.name = 'a'
    inner = -0.5 * T.sum(a / sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=1)
    inner.name = 'inner'
    nll = -logsumexp(T.log(coeff) + inner, axis=1)
    nll.name = 'logsum'
    return nll
Exemple #6
0
def BivariateGMM(y, mu, sigma, corr, coeff, binary, epsilon = 1e-5):
    """
    Bivariate gaussian mixture model negative log-likelihood
    Parameters
    ----------
    """
    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, 'x')

    mu_1 = mu[:,0,:]
    mu_2 = mu[:,1,:]

    sigma_1 = sigma[:,0,:]
    sigma_2 = sigma[:,1,:]

    binary = (binary+epsilon)*(1-2*epsilon)

    c_b =  tensor.sum( tensor.xlogx.xlogy0(y[:,0,:],  binary) +
              tensor.xlogx.xlogy0(1 - y[:,0,:], 1 - binary), axis = 1)

    inner1 =  (0.5*tensor.log(1.-corr**2 + epsilon)) + \
                         tensor.log(sigma_1) + tensor.log(sigma_2) +\
                         tensor.log(2. * numpy.pi)

    Z = (((y[:,1,:] - mu_1)/sigma_1)**2) + (((y[:,2,:] - mu_2) / sigma_2)**2) - \
        (2. * (corr * (y[:,1,:] - mu_1)*(y[:,2,:] - mu_2)) / (sigma_1 * sigma_2))
    inner2 = 0.5 * (1. / (1. - corr**2 + epsilon))
    cost = - (inner1 + (inner2 * Z))

    nll = -logsumexp(tensor.log(coeff) + cost, axis=1) - c_b
    return nll.reshape(shape_y[:-1], ndim = n_dim-1)
Exemple #7
0
def BivariateGMM(y, mu, sigma, corr, coeff, binary, epsilon = 1e-5):
    """
    Bivariate gaussian mixture model negative log-likelihood
    Parameters
    ----------
    """
    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, 'x')

    mu_1 = mu[:,0,:]
    mu_2 = mu[:,1,:]

    sigma_1 = sigma[:,0,:]
    sigma_2 = sigma[:,1,:]

    c_b =  T.sum( T.xlogx.xlogy0(y[:,0,:], binary) +
              T.xlogx.xlogy0(1 - y[:,0,:], 1 - binary), axis = 1)

    inner1 =  (0.5*T.log(1.-corr**2 + epsilon)) + \
                         T.log(sigma_1) + T.log(sigma_2) +\
                         T.log(2. * np.pi)

    Z = (((y[:,1,:] - mu_1)/sigma_1)**2) + (((y[:,2,:] - mu_2) / sigma_2)**2) - \
        (2. * (corr * (y[:,1,:] - mu_1)*(y[:,2,:] - mu_2)) / (sigma_1 * sigma_2))
    inner2 = 0.5 * (1. / (1. - corr**2 + epsilon))
    cost = - (inner1 + (inner2 * Z))

    nll = -logsumexp(T.log(coeff) + cost, axis=1) - c_b
    return nll.reshape(shape_y[:-1], ndim = n_dim-1)
Exemple #8
0
def GMM(y, mu, sig, coeff):
    """
    Gaussian mixture model negative log-likelihood

    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """
    y = y.dimshuffle(0, 1, "x")
    mu = mu.reshape((mu.shape[0], mu.shape[1] / coeff.shape[-1], coeff.shape[-1]))
    sig = sig.reshape((sig.shape[0], sig.shape[1] / coeff.shape[-1], coeff.shape[-1]))
    inner = -0.5 * T.sum(T.sqr(y - mu) / sig ** 2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=1)
    nll = -logsumexp(T.log(coeff) + inner, axis=1)
    return nll
Exemple #9
0
def BiGMM(y, mu, sig, coeff, corr, binary):
    """
    Bivariate Gaussian mixture model negative log-likelihood
    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    corr  : FullyConnected (Tanh)
    binary: FullyConnected (Sigmoid)
    """
    y = y.dimshuffle(0, 1, 'x')

    mu = mu.reshape((mu.shape[0],
                     mu.shape[1] / coeff.shape[-1],
                     coeff.shape[-1]))

    mu_1 = mu[:, 0, :]
    mu_2 = mu[:, 1, :]

    sig = sig.reshape((sig.shape[0],
                       sig.shape[1] / coeff.shape[-1],
                       coeff.shape[-1]))

    sig_1 = sig[:, 0, :]
    sig_2 = sig[:, 1, :]

    c_b = T.sum(T.xlogx.xlogy0(y[:, 0, :], binary) +
                T.xlogx.xlogy0(1 - y[:, 0, :], 1 - binary), axis=1)

    inner1 = (0.5 * T.log(1 - corr ** 2) +
              T.log(sig_1) + T.log(sig_2) + T.log(2 * np.pi))

    z = (((y[:, 1, :] - mu_1) / sig_1)**2 + ((y[:, 2, :] - mu_2) / sig_2)**2 -
         (2. * (corr * (y[:, 1, :] - mu_1) * (y[:, 2, :] - mu_2)) / (sig_1 * sig_2)))

    inner2 = 0.5 * (1. / (1. - corr**2))
    cost = -(inner1 + (inner2 * z))

    nll = -logsumexp(T.log(coeff) + cost, axis=1) - c_b

    return nll
Exemple #10
0
def BiGMM(y, mu, sig, coeff, corr, binary):
    """
    Bivariate Gaussian mixture model negative log-likelihood
    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    corr  : FullyConnected (Tanh)
    binary: FullyConnected (Sigmoid)
    """
    y = y.dimshuffle(0, 1, 'x')

    mu = mu.reshape(
        (mu.shape[0], mu.shape[1] / coeff.shape[-1], coeff.shape[-1]))

    mu_1 = mu[:, 0, :]
    mu_2 = mu[:, 1, :]

    sig = sig.reshape(
        (sig.shape[0], sig.shape[1] / coeff.shape[-1], coeff.shape[-1]))

    sig_1 = sig[:, 0, :]
    sig_2 = sig[:, 1, :]

    c_b = T.sum(T.xlogx.xlogy0(y[:, 0, :], binary) +
                T.xlogx.xlogy0(1 - y[:, 0, :], 1 - binary),
                axis=1)

    inner1 = (0.5 * T.log(1 - corr**2) + T.log(sig_1) + T.log(sig_2) +
              T.log(2 * np.pi))

    z = (((y[:, 1, :] - mu_1) / sig_1)**2 + ((y[:, 2, :] - mu_2) / sig_2)**2 -
         (2. * (corr * (y[:, 1, :] - mu_1) * (y[:, 2, :] - mu_2)) /
          (sig_1 * sig_2)))

    inner2 = 0.5 * (1. / (1. - corr**2))
    cost = -(inner1 + (inner2 * z))

    nll = -logsumexp(T.log(coeff) + cost, axis=1) - c_b

    return nll
Exemple #11
0
def GMM_phase(y, mu, sig, coeff):
    """
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """

    n_dim = y.ndim
    shape_y = y.shape
    print n_dim
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, 'x')

    mu = mu.reshape((-1,mu.shape[-1]/coeff.shape[-1],coeff.shape[-1]))
    sig = sig.reshape((-1, sig.shape[-1]/coeff.shape[-1],coeff.shape[-1]))
    coeff = coeff.reshape((-1, coeff.shape[-1]))

    inner0 = np.pi - abs(T.mod(y - mu, 2*np.pi) - np.pi)
    inner = -0.5 * T.sum(T.sqr(inner0)/sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis= -2)
    nll = -logsumexp(T.log(coeff) +inner, axis=-1)
    return nll.reshape(shape_y[:-1], ndim = n_dim-1)
Exemple #12
0
def BivariateGMM(y, mu, sigma, corr, coeff, binary, epsilon=1e-5):
    """
    Bivariate gaussian mixture model negative log-likelihood
    Parameters
    ----------
    """
    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, "x")

    mu_1 = mu[:, 0, :]
    mu_2 = mu[:, 1, :]

    sigma_1 = sigma[:, 0, :]
    sigma_2 = sigma[:, 1, :]

    binary = (binary + epsilon) * (1 - 2 * epsilon)

    c_b = tensor.sum(tensor.xlogx.xlogy0(y[:, 0, :], binary) + tensor.xlogx.xlogy0(1 - y[:, 0, :], 1 - binary), axis=1)

    inner1 = (
        (0.5 * tensor.log(1.0 - corr ** 2 + epsilon))
        + tensor.log(sigma_1)
        + tensor.log(sigma_2)
        + tensor.log(2.0 * numpy.pi)
    )

    Z = (
        (((y[:, 1, :] - mu_1) / sigma_1) ** 2)
        + (((y[:, 2, :] - mu_2) / sigma_2) ** 2)
        - (2.0 * (corr * (y[:, 1, :] - mu_1) * (y[:, 2, :] - mu_2)) / (sigma_1 * sigma_2))
    )
    inner2 = 0.5 * (1.0 / (1.0 - corr ** 2 + epsilon))
    cost = -(inner1 + (inner2 * Z))

    nll = -logsumexp(tensor.log(coeff) + cost, axis=1) - c_b
    return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
Exemple #13
0
def GMM_phase(y, mu, sig, coeff):
    """
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """

    n_dim = y.ndim
    shape_y = y.shape
    print n_dim
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, 'x')

    mu = mu.reshape((-1, mu.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    sig = sig.reshape((-1, sig.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    coeff = coeff.reshape((-1, coeff.shape[-1]))

    inner0 = np.pi - abs(T.mod(y - mu, 2 * np.pi) - np.pi)
    inner = -0.5 * T.sum(
        T.sqr(inner0) / sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=-2)
    nll = -logsumexp(T.log(coeff) + inner, axis=-1)
    return nll.reshape(shape_y[:-1], ndim=n_dim - 1)
Exemple #14
0
def NCGMM(y, mu, sig, coeff, p_noise=0.1):
    """
    Gaussian mixture model negative log-likelihood
    with noise collecting Gaussian

    Parameters
    ----------
    y      : TensorVariable
    mu     : FullyConnected (Linear)
    sig    : FullyConnected (Softplus)
    coeff  : FullyConnected (Softmax)
    """
    n_noise = T.cast(T.floor(coeff.shape[-1] * p_noise), "int32")
    y = y.dimshuffle(0, 1, "x")
    mu = mu.reshape((mu.shape[0], mu.shape[1] / (coeff.shape[-1] - n_noise), coeff.shape[-1] - n_noise))
    sig = sig.reshape((sig.shape[0], sig.shape[1] / coeff.shape[-1], coeff.shape[-1]))
    vsig = sig[:, :, :-n_noise]
    uvsig = sig[:, :, -n_noise:]
    vinner = -0.5 * T.sum(T.sqr(y - mu) / vsig ** 2 + 2 * T.log(vsig) + T.log(2 * np.pi), axis=1)
    uvinner = -0.5 * T.sum(T.sqr(y) / uvsig ** 2 + 2 * T.log(uvsig) + T.log(2 * np.pi), axis=1)
    inner = T.concatenate([vinner, uvinner], axis=1)
    nll = -logsumexp(T.log(coeff) + inner, axis=1)
    return nll
Exemple #15
0
def inner_fn(x_t, s_tm1, s_tm1_is):

    x_1_t = x_1.fprop([x_t])
    x_2_t = x_2.fprop([x_1_t])
    x_3_t = x_3.fprop([x_2_t])
    x_4_t = x_4.fprop([x_3_t])

    phi_1_t = phi_1.fprop([x_4_t, s_tm1])
    phi_2_t = phi_2.fprop([phi_1_t])
    phi_3_t = phi_3.fprop([phi_2_t])
    phi_4_t = phi_4.fprop([phi_3_t])
    phi_mu_t = phi_mu.fprop([phi_4_t])
    phi_sig_t = phi_sig.fprop([phi_4_t])

    prior_1_t = prior_1.fprop([s_tm1])
    prior_2_t = prior_2.fprop([prior_1_t])
    prior_3_t = prior_3.fprop([prior_2_t])
    prior_4_t = prior_4.fprop([prior_3_t])
    prior_mu_t = prior_mu.fprop([prior_4_t])
    prior_sig_t = prior_sig.fprop([prior_4_t])

    z_t = prior.fprop([phi_mu_t, phi_sig_t])
    kl_t = kl.fprop([phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t])

    z_1_t = z_1.fprop([z_t])
    z_2_t = z_2.fprop([z_1_t])
    z_3_t = z_3.fprop([z_2_t])
    z_4_t = z_4.fprop([z_3_t])

    theta_1_t = theta_1.fprop([z_4_t, s_tm1])
    theta_2_t = theta_2.fprop([theta_1_t])
    theta_3_t = theta_3.fprop([theta_2_t])
    theta_4_t = theta_4.fprop([theta_3_t])
    theta_mu_t = theta_mu.fprop([theta_4_t])
    theta_sig_t = theta_sig.fprop([theta_4_t])
    coeff_t = coeff.fprop([theta_4_t])

    s_t = main_lstm.fprop([[x_4_t, z_4_t], [s_tm1]])

    x_t_is = T.repeat(x_t, num_sample, axis=0)
    x_1_t_is = x_1.fprop([x_t_is])
    x_2_t_is = x_2.fprop([x_1_t_is])
    x_3_t_is = x_3.fprop([x_2_t_is])
    x_4_t_is = x_4.fprop([x_3_t_is])

    phi_1_t_is = phi_1.fprop([x_4_t_is, s_tm1_is])
    phi_2_t_is = phi_2.fprop([phi_1_t_is])
    phi_3_t_is = phi_3.fprop([phi_2_t_is])
    phi_4_t_is = phi_4.fprop([phi_3_t_is])
    phi_mu_t_is = phi_mu.fprop([phi_4_t_is])
    phi_sig_t_is = phi_sig.fprop([phi_4_t_is])

    prior_1_t_is = prior_1.fprop([s_tm1_is])
    prior_2_t_is = prior_2.fprop([prior_1_t_is])
    prior_3_t_is = prior_3.fprop([prior_2_t_is])
    prior_4_t_is = prior_4.fprop([prior_3_t_is])
    prior_mu_t_is = prior_mu.fprop([prior_4_t_is])
    prior_sig_t_is = prior_sig.fprop([prior_4_t_is])

    z_t_is = prior.sample([phi_mu_t_is, phi_sig_t_is])
    z_1_t_is = z_1.fprop([z_t_is])
    z_2_t_is = z_2.fprop([z_1_t_is])
    z_3_t_is = z_3.fprop([z_2_t_is])
    z_4_t_is = z_4.fprop([z_3_t_is])

    theta_1_t_is = theta_1.fprop([z_4_t_is, s_tm1_is])
    theta_2_t_is = theta_2.fprop([theta_1_t_is])
    theta_3_t_is = theta_3.fprop([theta_2_t_is])
    theta_4_t_is = theta_4.fprop([theta_3_t_is])
    theta_mu_t_is = theta_mu.fprop([theta_4_t_is])
    theta_sig_t_is = theta_sig.fprop([theta_4_t_is])
    coeff_t_is = coeff.fprop([theta_4_t_is])
    mll = (GMM(x_t_is, theta_mu_t_is, theta_sig_t_is, coeff_t_is) +
           Gaussian(z_t_is, prior_mu_t_is, prior_sig_t_is) -
           Gaussian(z_t_is, phi_mu_t_is, phi_sig_t_is))
    mll = mll.reshape((batch_size, num_sample))
    mll = logsumexp(-mll, axis=1) - T.log(num_sample)

    s_t_is = main_lstm.fprop([[x_4_t_is, z_4_t_is], [s_tm1_is]])

    return s_t, s_t_is, kl_t, theta_mu_t, theta_sig_t, coeff_t, mll
def GMMdisagMulti(dim, y, mu, sig, coeff, *args):
    """
    Gaussian mixture model negative log-likelihood

    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """

    y1 = y[:,0].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')#[:,0,:]
    y1.name = 'y1_shuffled'
    mu = mu.reshape((mu.shape[0],mu.shape[1]//coeff.shape[-1],coeff.shape[-1]))
    sig = sig.reshape((sig.shape[0],sig.shape[1]//coeff.shape[-1],coeff.shape[-1]))
    inner1 = -0.5 * T.sum(T.sqr(y1 - mu) / sig**2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=1)
    inner1.name = 'inner'
    nll1 = -logsumexp(T.log(coeff) + inner1, axis=1)
    nll1.name = 'logsum'
    nll = nll1

    if (dim>1):
        mu2, sig2, coeff2 = args[0], args[1], args[2]
        y2 = y[:,1].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')#[:,1,:]
        y2.name = 'y2_shuffled'
        mu2 = mu2.reshape((mu2.shape[0],mu2.shape[1]//coeff2.shape[-1],coeff2.shape[-1]))
        sig2 = sig2.reshape((sig2.shape[0],sig2.shape[1]//coeff2.shape[-1],coeff2.shape[-1]))
        inner2 = -0.5 * T.sum(T.sqr(y2 - mu2) / sig2**2 + 2 * T.log(sig2) + T.log(2 * np.pi), axis=1)
        nll2 = -logsumexp(T.log(coeff2) + inner2, axis=1)
        nll = nll + nll2
    if (dim>2):
        mu3, sig3, coeff3 = args[3], args[4], args[5]
        y3 = y[:,2].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
        y3.name = 'y3_shuffled'
        mu3 = mu3.reshape((mu3.shape[0],mu3.shape[1]//coeff3.shape[-1],coeff3.shape[-1]))
        sig3 = sig3.reshape((sig3.shape[0],sig3.shape[1]//coeff3.shape[-1],coeff3.shape[-1]))
        inner3 = -0.5 * T.sum(T.sqr(y3 - mu3) / sig3**2 + 2 * T.log(sig3) + T.log(2 * np.pi), axis=1)
        nll3 = -logsumexp(T.log(coeff3) + inner3, axis=1)
        nll = nll + nll3
    if (dim>3):
        mu4, sig4, coeff4 = args[6], args[7], args[8]
        y4 = y[:,3].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
        y4.name = 'y4_shuffled'
        mu4 = mu4.reshape((mu4.shape[0],mu4.shape[1]//coeff4.shape[-1],coeff4.shape[-1]))
        sig4 = sig4.reshape((sig4.shape[0],sig4.shape[1]//coeff4.shape[-1],coeff4.shape[-1]))
        inner4 = -0.5 * T.sum(T.sqr(y4 - mu4) / sig4**2 + 2 * T.log(sig4) + T.log(2 * np.pi), axis=1)
        nll4 = -logsumexp(T.log(coeff4) + inner4, axis=1)
        nll = nll + nll4
    if (dim>4):
        mu5, sig5, coeff5 = args[9], args[10], args[11]
        y5 = y[:,4].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
        y5.name = 'y5_shuffled'
        mu5 = mu5.reshape((mu5.shape[0],mu5.shape[1]//coeff5.shape[-1],coeff5.shape[-1]))
        sig5 = sig5.reshape((sig5.shape[0],sig5.shape[1]//coeff5.shape[-1],coeff5.shape[-1]))
        inner5 = -0.5 * T.sum(T.sqr(y5 - mu5) / sig5**2 + 2 * T.log(sig5) + T.log(2 * np.pi), axis=1)
        nll5 = -logsumexp(T.log(coeff5) + inner5, axis=1)
        nll = nll + nll5
    if (dim>5):
        mu6, sig6, coeff6 = args[12], args[13], args[14]
        y6 = y[:,5].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
        y6.name = 'y6_shuffled'
        mu6 = mu6.reshape((mu6.shape[0],mu6.shape[1]//coeff6.shape[-1],coeff6.shape[-1]))
        sig6 = sig6.reshape((sig6.shape[0],sig6.shape[1]//coeff6.shape[-1],coeff6.shape[-1]))
        inner6 = -0.5 * T.sum(T.sqr(y6 - mu6) / sig6**2 + 2 * T.log(sig6) + T.log(2 * np.pi), axis=1)
        nll6 = -logsumexp(T.log(coeff6) + inner6, axis=1)
        nll = nll + nll6
    if (dim>6):
        mu7, sig7, coeff7 = args[15], args[16], args[17]
        y7 = y[:,6].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
        y7.name = 'y7_shuffled'
        mu7 = mu7.reshape((mu7.shape[0],mu7.shape[1]//coeff7.shape[-1],coeff7.shape[-1]))
        sig7 = sig7.reshape((sig7.shape[0],sig7.shape[1]//coeff7.shape[-1],coeff7.shape[-1]))
        inner7 = -0.5 * T.sum(T.sqr(y7 - mu7) / sig7**2 + 2 * T.log(sig7) + T.log(2 * np.pi), axis=1)
        nll7 = -logsumexp(T.log(coeff7) + inner7, axis=1)
        nll = nll + nll7
    if (dim>7):
        mu8, sig8, coeff8 = args[18], args[19], args[20]
        y8 = y[:,7].dimshuffle(0, 'x').dimshuffle(0, 1, 'x')
        y8.name = 'y8_shuffled'
        mu8 = mu8.reshape((mu8.shape[0],mu8.shape[1]//coeff8.shape[-1],coeff8.shape[-1]))
        sig8 = sig8.reshape((sig8.shape[0],sig8.shape[1]//coeff8.shape[-1],coeff8.shape[-1]))
        inner8 = -0.5 * T.sum(T.sqr(y8 - mu8) / sig8**2 + 2 * T.log(sig8) + T.log(2 * np.pi), axis=1)
        nll8 = -logsumexp(T.log(coeff8) + inner8, axis=1)
        nll = nll + nll8
    #coeff = coeff.reshape((coeff.shape[0], 1,coeff.shape[1] ))
    return nll