Ejemplo n.º 1
0
def _kl_gumbel_gumbel(dist1, dist2):
    scale_1d2 = dist1.scale / dist2.scale
    return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
        + EULER * (scale_1d2 - 1.) \
        + exponential.exp((dist2.loc - dist1.loc) / dist2.scale
                          + lgamma.lgamma(scale_1d2 + 1.)) \
        - 1 + (dist1.loc - dist2.loc) / dist2.scale
Ejemplo n.º 2
0
Archivo: gamma.py Proyecto: hvy/chainer
 def log_prob(self, x):
     logp = - lgamma.lgamma(self.k) - self.k * exponential.log(self.theta) \
         + (self.k - 1) * exponential.log(x) - x / self.theta
     xp = logp.xp
     inf = xp.full_like(logp.array, xp.inf)
     if isinstance(x, chainer.Variable):
         x = x.array
     return where.where(xp.asarray(x >= 0), logp, xp.asarray(-inf))
Ejemplo n.º 3
0
 def log_prob(self, x):
     x = chainer.as_variable(x)
     logp = exponential.log(self.alpha) \
         + self.alpha * exponential.log(self.scale) \
         - (self.alpha + 1) * exponential.log(x)
     xp = logp.xp
     return where.where(
         utils.force_array(x.data >= self.scale.data),
         logp, xp.array(-xp.inf, logp.dtype))
Ejemplo n.º 4
0
def _kl_pareto_pareto(dist1, dist2):
    kl = dist2.alpha * (exponential.log(dist1.scale)
                        - exponential.log(dist2.scale)) \
        + exponential.log(dist1.alpha) - exponential.log(dist2.alpha) \
        + (dist2.alpha - dist1.alpha) / dist1.alpha
    xp = kl.xp
    return where.where(
        dist1.scale.data >= dist2.scale.data,
        kl, xp.array(xp.inf, kl.dtype))
Ejemplo n.º 5
0
 def log_prob(self, x):
     logp = (self.a - 1) * exponential.log(x) \
         + (self.b - 1) * exponential.log(1 - x) \
         - _lbeta(self.a, self.b)
     xp = logp.xp
     inf = xp.full_like(logp.array, xp.inf)
     if isinstance(x, chainer.Variable):
         x = x.array
     return where.where(xp.logical_and(x >= 0, x <= 1), logp, -inf)
Ejemplo n.º 6
0
 def log_prob(self, x):
     x = chainer.as_variable(x)
     logp = (self.a - 1) * exponential.log(x) \
         + (self.b - 1) * exponential.log(1 - x) \
         - _lbeta(self.a, self.b)
     xp = logp.xp
     return where.where(
         utils.force_array((x.array >= 0) & (x.array <= 1)),
         logp, xp.array(-xp.inf, logp.dtype))
Ejemplo n.º 7
0
def _kl_uniform_uniform(dist1, dist2):
    xp = backend.get_array_module(dist1.low)

    is_inf = xp.logical_or(dist1.high.data > dist2.high.data,
                           dist1.low.data < dist2.low.data)
    kl = - exponential.log(dist1.high - dist1.low) \
        + exponential.log(dist2.high - dist2.low)
    inf = xp.array(xp.inf, dist1.high.dtype)

    return where.where(is_inf, inf, kl)
Ejemplo n.º 8
0
def _kl_uniform_uniform(dist1, dist2):
    xp = cuda.get_array_module(dist1.low)

    is_inf = xp.logical_or(dist1.high.data > dist2.high.data,
                           dist1.low.data < dist2.low.data)
    kl = - exponential.log(dist1.high - dist1.low) \
        + exponential.log(dist2.high - dist2.low)
    inf = xp.full_like(dist1.high.data, numpy.inf)

    return where.where(is_inf, inf, kl)
Ejemplo n.º 9
0
 def log_prob(self, x):
     logp = exponential.log(self.lam) - self.lam * x
     xp = logp.xp
     if isinstance(x, chainer.Variable):
         x = x.array
     inf = xp.full_like(logp.array, xp.inf)
     return where.where(xp.asarray(x >= 0), logp, xp.asarray(-inf))
Ejemplo n.º 10
0
 def log_prob(self, x):
     if isinstance(x, chainer.Variable):
         x = x.data
     x = x.astype(self.lam.dtype)
     xp1 = (x + 1).astype(self.lam.dtype)
     x, xp1 = utils.force_array(x), utils.force_array(xp1)
     return x * exponential.log(self.lam) - lgamma.lgamma(xp1) - self.lam
Ejemplo n.º 11
0
def _kl_multivariatenormal_multivariatenormal(dist1, dist2):
    diag = diagonal.diagonal(dist1.scale_tril, axis1=-2, axis2=-1)
    logdet1 = sum_mod.sum(exponential.log(abs(diag)), axis=-1)

    diag = diagonal.diagonal(dist2.scale_tril, axis1=-2, axis2=-1)
    logdet2 = sum_mod.sum(exponential.log(abs(diag)), axis=-1)

    scale_tril_inv2 = _batch_triangular_inv(dist2.scale_tril.reshape(
        -1, dist2.d, dist2.d))
    trace = sum_mod.sum(matmul.matmul(
        scale_tril_inv2, dist1.scale_tril.reshape(-1, dist2.d, dist2.d)) ** 2,
        axis=(-1, -2)).reshape(dist1.batch_shape)

    mu = dist1.loc - dist2.loc
    mah = matmul.matmul(scale_tril_inv2, mu.reshape(-1, dist1.d, 1))
    mah = sum_mod.sum(mah ** 2, axis=-2).reshape(dist1.batch_shape)
    return logdet2 - logdet1 + 0.5 * trace + 0.5 * mah - 0.5 * dist1.d
Ejemplo n.º 12
0
    def log_prob(self, x):
        if not isinstance(x, chainer.Variable):
            x = chainer.Variable(x)

        xp = backend.get_array_module(x)

        logp = broadcast.broadcast_to(
            -exponential.log(self.scale), x.shape)
        return where.where(
            utils.force_array(
                (x.data >= self.low.data) & (x.data <= self.high.data)),
            logp, xp.array(-xp.inf, logp.dtype))
Ejemplo n.º 13
0
    def log_prob(self, x):
        if not isinstance(x, chainer.Variable):
            x = chainer.Variable(x)

        xp = cuda.get_array_module(x)

        logp = broadcast.broadcast_to(
            -exponential.log(self.scale), x.shape)
        return where.where(
            utils.force_array(
                (x.data >= self.low.data) & (x.data < self.high.data)),
            logp, xp.full_like(logp.array, -numpy.inf))
Ejemplo n.º 14
0
def black_out(x, t, W, samples):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    Args:
        x (~chainer.Variable): Batch of input vectors.
        t (~chainer.Variable): Vector of ground truth labels.
        W (~chainer.Variable): Weight matrix.
        samples (~chainer.Variable): Negative samples.

    Returns:
        ~chainer.Variable: Loss value.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.batch_matmul(neg_emb, x)
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.batch_matmul(pos_emb, x)
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(
        reshape.reshape(logz, (batch_size, 1)), neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size,))
    loss = py - logz + _sum.sum(ny, axis=1)
    return -_sum.sum(loss) / batch_size
Ejemplo n.º 15
0
    def __init__(self, p=None, logit=None):
        super(Bernoulli, self).__init__()
        if not (p is None) ^ (logit is None):
            raise ValueError(
                "Either `p` or `logit` (not both) must have a value.")

        with chainer.using_config('enable_backprop', True):
            if p is None:
                self.logit = chainer.as_variable(logit)
                self.p = sigmoid.sigmoid(self.logit)
            else:
                self.p = chainer.as_variable(p)
                self.logit = exponential.log(self.p) \
                    - logarithm_1p.log1p(-self.p)
Ejemplo n.º 16
0
    def __init__(self, p=None, **kwargs):
        logit = None
        if kwargs:
            logit, = argument.parse_kwargs(
                kwargs, ('logit', logit))
        if not (p is None) ^ (logit is None):
            raise ValueError(
                "Either `p` or `logit` (not both) must have a value.")

        with chainer.using_config('enable_backprop', True):
            if p is None:
                logit = chainer.as_variable(logit)
                self.__log_p = log_softmax.log_softmax(logit, axis=-1)
                self.__p = exponential.exp(self.__log_p)
            else:
                self.__p = chainer.as_variable(p)
                self.__log_p = exponential.log(self.__p)
Ejemplo n.º 17
0
    def __init__(self, loc, scale=None, **kwargs):
        super(Normal, self).__init__()
        log_scale = None
        if kwargs:
            log_scale, = argument.parse_kwargs(
                kwargs, ('log_scale', log_scale))
        if not (scale is None) ^ (log_scale is None):
            raise ValueError(
                "Either `scale` or `log_scale` (not both) must have a value.")
        self.loc = chainer.as_variable(loc)

        with chainer.using_config('enable_backprop', True):
            if scale is None:
                self.__log_scale = chainer.as_variable(log_scale)
                self.__scale = exponential.exp(self.log_scale)
            else:
                self.__scale = chainer.as_variable(scale)
                self.__log_scale = exponential.log(self.scale)
Ejemplo n.º 18
0
 def log_p(self):
     if self.__p is not None:
         return exponential.log(self.__p)
     else:
         return log_softmax.log_softmax(self.__logit, axis=-1)
Ejemplo n.º 19
0
 def _log_lam(self):
     return exponential.log(self.lam)
Ejemplo n.º 20
0
def _kl_geometric_geometric(dist1, dist2):
    return (1 / dist1.p - 1) \
        * (exponential.log(1 - dist1.p) - exponential.log(1 - dist2.p)) \
        + exponential.log(dist1.p) - exponential.log(dist2.p)
Ejemplo n.º 21
0
 def log_prob(self, x):
     return (x - 1) * exponential.log(1 - self.p) + exponential.log(self.p)
Ejemplo n.º 22
0
 def log_cdf(self, x):
     return exponential.log(self.cdf(x))
Ejemplo n.º 23
0
 def log_scale(self):
     if self.__log_scale is not None:
         return chainer.as_variable(self.__log_scale)
     else:
         return exponential.log(self.scale)
Ejemplo n.º 24
0
 def logit(self):
     if self.__logit is not None:
         return chainer.as_variable(self.__logit)
     else:
         return exponential.log(self.p) - logarithm_1p.log1p(-self.p)
Ejemplo n.º 25
0
 def log_p(self):
     if self.__p is not None:
         return exponential.log(self.__p)
     else:
         return log_softmax.log_softmax(self.__logit, axis=-1)
Ejemplo n.º 26
0
 def log_prob(self, x):
     logx = exponential.log(x)
     return LOGPROBC - self._log_sigma - logx \
         - (0.5 * (logx - self.mu) ** 2 / self.sigma ** 2)
Ejemplo n.º 27
0
def black_out(x, t, W, samples, reduce='mean'):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    The output is a variable whose value depends on the value of
    the option ``reduce``. If it is ``'no'``, it holds the
    no loss values. If it is ``'mean'``, this function takes
    a mean of loss values.

    Args:
        x (:class:`~chainer.Variable` or :ref:`ndarray`):
            Batch of input vectors.
            Its shape should be :math:`(N, D)`.
        t (:class:`~chainer.Variable` or :ref:`ndarray`):
            Vector of ground truth labels.
            Its shape should be :math:`(N,)`. Each elements :math:`v`
            should satisfy :math:`0 \\geq v \\geq V` or :math:`-1`
            where :math:`V` is the number of label types.
        W (:class:`~chainer.Variable` or :ref:`ndarray`):
            Weight matrix.
            Its shape should be :math:`(V, D)`
        samples (~chainer.Variable): Negative samples.
            Its shape should be :math:`(N, S)` where :math:`S` is
            the number of negative samples.
        reduce (str): Reduction option. Its value must be either
            ``'no'`` or ``'mean'``. Otherwise,
            :class:`ValueError` is raised.

    Returns:
        ~chainer.Variable:
            A variable object holding loss value(s).
            If ``reduce`` is ``'no'``, the output variable holds an
            array whose shape is :math:`(N,)` .
            If it is ``'mean'``, it holds a scalar.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.matmul(neg_emb, x[:, :, None])
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.matmul(pos_emb, x[:, :, None])
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(reshape.reshape(logz, (batch_size, 1)),
                                        neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size, ))
    loss = -(py - logz + _sum.sum(ny, axis=1))
    if reduce == 'mean':
        loss = average.average(loss)
    return loss
Ejemplo n.º 28
0
 def _log_sigma(self):
     return exponential.log(self.sigma)
Ejemplo n.º 29
0
 def log_prob(self, x):
     return (-numpy.log(numpy.pi) + exponential.log(self.scale) -
             exponential.log((x - self.loc)**2 + self.scale**2))
Ejemplo n.º 30
0
 def entropy(self):
     return exponential.log(4 * numpy.pi * self.scale)
Ejemplo n.º 31
0
 def entropy(self):
     return self.k + exponential.log(self.theta) + lgamma.lgamma(self.k) \
         + (1 - self.k) * digamma.digamma(self.k)
Ejemplo n.º 32
0
 def log_survival_function(self, x):
     return exponential.log(self.survival_function(x))
Ejemplo n.º 33
0
 def entropy(self):
     return 0.5 - LOGPROBC + exponential.log(self.sigma) + self.mu
Ejemplo n.º 34
0
 def log_p(self):
     return exponential.log(self.p)
Ejemplo n.º 35
0
def _kl_bernoulli_bernoulli(dist1, dist2):
    return (dist1.logit - dist2.logit) * (dist1.p - 1.) \
        - exponential.log(exponential.exp(-dist1.logit) + 1) \
        + exponential.log(exponential.exp(-dist2.logit) + 1)
Ejemplo n.º 36
0
def _kl_log_normal_log_normal(dist1, dist2):
    return 0.5 * ((dist1.mu - dist2.mu) ** 2 +
                  dist1.sigma ** 2) / dist2.sigma ** 2 - 0.5 \
        + exponential.log(dist2.sigma) - exponential.log(dist1.sigma)
Ejemplo n.º 37
0
 def log_prob(self, x):
     return - exponential.log(broadcast.broadcast_to(self.scale, x.shape)) \
         - 0.5 * (x - broadcast.broadcast_to(self.loc, x.shape)) ** 2 \
         / broadcast.broadcast_to(self.scale, x.shape) ** 2 + LOGPROBC
Ejemplo n.º 38
0
 def _log_alpha(self):
     return exponential.log(self.alpha)
Ejemplo n.º 39
0
def _kl_normal_normal(dist1, dist2):
    return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
        + 0.5 * (dist1.scale ** 2 + (dist1.loc - dist2.loc) ** 2) \
        / dist2.scale ** 2 - 0.5
Ejemplo n.º 40
0
def _kl_laplace_laplace(dist1, dist2):
    diff = abs(dist1.loc - dist2.loc)
    return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
        + diff / dist2.scale \
        + dist1.scale / dist2.scale * exponential.exp(- diff / dist1.scale) - 1
Ejemplo n.º 41
0
def infer_initial_states_sctrnn(params,
                                old_model,
                                testing_data,
                                num_timesteps=0,
                                epochs=None,
                                start_is='mean',
                                error_computation='standard',
                                single_recognition=False,
                                hyp_prior=None,
                                external_signal_variance=-1,
                                x_start=None,
                                use_init_state_loss=True):

    # each trajectory is handled as a separate "class", infer initial states per class
    num_classes = testing_data.shape[0]
    # full number of timesteps
    num_timesteps_orig = int(testing_data.shape[1] / params.num_io)
    # timesteps to use for inference
    if num_timesteps == 0:
        num_timesteps = num_timesteps_orig

    gpu_id = 0  # -1 for CPU
    # Determine whether CPU or GPU should be used
    xp = np
    if gpu_id >= 0 and cuda.available:
        print("Use GPU!")
        cuda.get_device_from_id(gpu_id).use()
        xp = cuda.cupy
    else:
        print("Use CPU!")
        gpu_id = -1

    c = []
    num_samples_per_class = 1
    for i in range(num_classes):
        for j in range(num_samples_per_class):
            c.append(i)
    c_train = xp.array(c)

    save_location = "."
    if os.path.exists("/media/AnjaDataDrive"):
        save_location = "/media/AnjaDataDrive"
    save_location += "/results"

    now = datetime.datetime.now()
    expStr = str(now.year).zfill(4) + "-" + str(
        now.month).zfill(2) + "-" + str(now.day).zfill(2) + "_" + str(
            now.hour).zfill(2) + "-" + str(now.minute).zfill(2) + "_" + str(
                now.microsecond).zfill(7) + "_inference"
    save_dir = os.path.join(save_location, expStr)
    print(save_dir)

    pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True)

    save_interval = 100  # interval for testing the production capability of the network and saving initial state information
    save_model_interval = 100  # interval for storing the learned model

    # Should better already be done outside this method
    # try:
    #     x_train = range2norm(x_train_orig, params.norm_offset, params.norm_range, minmax = params.minmax)
    #     x_train = xp.float32(x_train)
    #     # N = len(x_train)
    # except:
    #     print("No normalization applicable...")
    #     x_train = testing_data

    # CUT PART OF THE TRAINING SIGNAL (COMPLETION TASK)
    testing_data_cut = testing_data[:, 0:params.num_io * num_timesteps]

    plot_results(xp.copy(testing_data_cut[0::num_samples_per_class]),
                 num_timesteps,
                 os.path.join(save_dir, 'target_trajectories.png'),
                 params.num_io,
                 twoDim=True)

    info = "same trajectories (original #timesteps: " + str(
        num_timesteps_orig) + "), used #timesteps: " + str(num_timesteps)

    # copy network model and prepare it for backpropagation inference
    params.learn_weights = False
    params.learn_bias = False
    params.epochs = epochs
    max_epochs = 500
    if params.epochs:
        epoch_array_size = params.epochs
    else:
        epoch_array_size = max_epochs

    model = SCTRNN(params.num_io,
                   params.num_c,
                   params.tau_c,
                   num_classes,
                   init_state_init=params.init_state_init,
                   init_state_learning=params.learn_init_states,
                   weights_learning=params.learn_weights,
                   bias_learning=params.learn_bias,
                   tau_learning=params.learn_tau,
                   pretrained_model=old_model)
    #model.hyp_prior = params.hyp_prior
    #model.external_signal_variance = params.external_signal_variance
    if not hyp_prior is None:
        model.hyp_prior = hyp_prior
        params.hyp_prior = hyp_prior
    if external_signal_variance is None or external_signal_variance >= 0:
        model.external_signal_variance = external_signal_variance
        params.external_signal_variance = external_signal_variance
    params.lr = 0.01

    with open(os.path.join(save_dir, "info.txt"), 'w') as f:
        f.write(params.get_parameter_string())
        f.write("\n")
        f.write(info)
        f.write("\n")
    f.close()

    if start_is is 'mean':
        model.set_initial_states_mean()
    elif start_is is 'zero':
        model.set_initial_states_zero()
    else:
        model.initial_states.W.array = start_is
    #model.apply_estimated_variance = True
    model.set_init_state_learning(c_train)

    if gpu_id >= 0:
        model.to_gpu(gpu_id)
        testing_data = cuda.to_gpu(testing_data)
        x_start = cuda.to_gpu(x_start)

    save_network(save_dir,
                 params=params,
                 model=model,
                 model_filename="network-initial")

    # Optimizer
    optimizer = optimizers.Adam(params.lr)
    optimizer.setup(model)
    #optimizer.add_hook(chainer.optimizer.WeightDecay(0))

    history_init_state_var = np.zeros((epoch_array_size + 1, ))
    history_init_state_var[0] = np.mean(
        np.var(model.initial_states.W.array, axis=0))
    history_generation_error_proactive = np.empty((num_classes, ),
                                                  dtype=object)
    history_generation_error_reactive = np.empty((num_classes, ), dtype=object)
    history_training_error = np.zeros((epoch_array_size + 1, ))
    history_training_variance_estimation = np.zeros(
        (epoch_array_size + 1, num_classes))

    history_initial_states = []

    likelihood_per_epoch = []

    print("actual variance of init_states_0: " +
          str(history_init_state_var[0]))

    # Evaluate the performance of the untrained network
    test_batch_size = np.min(
        [model.initial_states.W.array.shape[0], testing_data.shape[0]])
    res, resv, resm = model.generate(model.initial_states.W.array,
                                     num_timesteps_orig,
                                     add_variance_to_output=0,
                                     x_start=x_start)
    results = res  #cuda.to_cpu(res)

    for i in range(num_classes):
        generation_error = chainer.functions.mean_squared_error(
            results[i, :], testing_data[i, :]).array.tolist()
        history_generation_error_proactive[i] = [generation_error]

        with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
            f.write("before learning: pattern generation error (proactive): " +
                    str(history_generation_error_proactive[i]) + "\n")

    plot_results(xp.copy(results),
                 num_timesteps_orig,
                 os.path.join(save_dir, "proactive_before-learning"),
                 params.num_io,
                 twoDim=True)

    res, resv, resm, pe, wpe, respost = model.generate(
        model.initial_states.W.array,
        num_timesteps_orig,
        external_input=xp.asarray(testing_data[0::num_samples_per_class, :]),
        add_variance_to_output=0,
        x_start=x_start)
    results = res  #cuda.to_cpu(res)

    for i in range(num_classes):
        generation_error = chainer.functions.mean_squared_error(
            results[i, :], testing_data[i, :]).array.tolist()
        history_generation_error_reactive[i] = [generation_error]

        with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
            f.write("before learning: pattern generation error (reactive): " +
                    str(history_generation_error_reactive[i]) + "\n")

    plot_results(xp.copy(results),
                 num_timesteps_orig,
                 os.path.join(save_dir, "reactive_before-learning"),
                 params.num_io,
                 twoDim=True)

    # arrays for tracking likelihood and determining stop condition
    all_mean_diffs = []
    all_std_diffs = []
    m1s = []
    s1s = []
    # tmp_epoch_marker = 0
    # conv_eval_interval = 1000 # the length of the interval to consider for determining convergence

    for epoch in range(1, epoch_array_size + 1):
        epochStart = time.time()

        outv = np.zeros((num_timesteps, ))

        # permutate samples in each epoch so that they are randomly ordered
        perm = np.random.permutation(testing_data_cut.shape[0])

        # here, one batch equals the full training set
        x_batch = xp.asarray(testing_data_cut[perm])
        x_batch = x_batch + 0.01 * xp.random.randn(
            x_batch.shape[0], x_batch.shape[1]).astype('float32')
        model.set_init_state_learning(c_train[perm])

        mean_init_states = chainer.Variable(xp.zeros((), dtype=xp.float32))
        mean_init_states = chainer.functions.average(model.initial_states.W,
                                                     axis=0)  #keepdims=True
        #mean_init_states = xp.mean(c0.array,axis=0) # using this instead causes no difference in resulting gradient of c0

        # initialize error
        acc_loss = chainer.Variable(xp.zeros(
            (), dtype=xp.float32))  # for weight backprop
        acc_init_loss = chainer.Variable(xp.zeros(
            (), dtype=xp.float32))  # for init states backprop
        err = xp.zeros(())  # for evaluation only

        # clear gradients from previous batch
        model.cleargrads()
        # clear output and variance estimations from previous batch
        model.reset_current_output()

        t = 0  # iterate through time
        x_t = x_batch[:, params.num_io * t:params.num_io * (t + 1)]
        # next time step to be predicted (for evaluation)
        x_t1 = x_batch[:, params.num_io * (t + 1):params.num_io * (t + 2)]
        # x_t = xp.reshape(x_batch[0][t,:], (1, params.num_io))
        # x_t1 = xp.reshape(x_batch[0][t+1,:], (1, params.num_io))
        # for i in range(1, params.batch_size):
        #     x_t = np.concatenate((x_t, xp.reshape(x_batch[i][t,:], (1,params.num_io))),axis=0)
        #     x_t1 = np.concatenate((x_t1, xp.reshape(x_batch[i][t+1,:], (1,params.num_io))),axis=0)

        # execute first forward step
        u_h, y, v = model(
            x_t, None
        )  # initial states of u_h are set automatically according to model.classes

        # noisy output estimation
        #y_out = y.array + xp.sqrt(v.array) * xp.random.randn()

        # compute prediction error, averaged over batch
        if error_computation == 'standard':
            # compare network prediction to ground truth
            loss_i = chainer.functions.gaussian_nll(chainer.Variable(x_t1), y,
                                                    exponential.log(v))
        elif error_computation == 'integrated':
            # compare network prediction to posterior of perception
            loss_i = chainer.functions.gaussian_nll(model.current_x, y,
                                                    exponential.log(v))
        acc_loss += loss_i

        acc_loss += loss_i

        # compute error for evaluation purposes
        err += chainer.functions.mean_squared_error(
            chainer.Variable(x_t), y).array.reshape(()) * params.batch_size

        outv[t] = xp.mean(v.array)

        # rollout trajectory
        for t in range(1, num_timesteps - 1):
            # current time step
            x_t = x_batch[:, params.num_io * t:params.num_io * (t + 1)]
            # next time step to be predicted (for evaluation)
            x_t1 = x_batch[:, params.num_io * (t + 1):params.num_io * (t + 2)]

            u_h, y, v = model(x_t, u_h)

            # noisy output estimation
            #y_out = y.array + xp.sqrt(v.array) * xp.random.randn()

            # compute error for backprop for weights
            if error_computation == 'standard':
                loss_i = chainer.functions.gaussian_nll(
                    chainer.Variable(x_t1), y, exponential.log(v))
            elif error_computation == 'integrated':
                integrated_x = params.training_external_contrib * chainer.Variable(
                    x_t1) + (1 - params.training_external_contrib) * (
                        y + chainer.functions.sqrt(v) * xp.random.randn())
                loss_i = chainer.functions.gaussian_nll(
                    integrated_x, y, exponential.log(v))
            acc_loss += loss_i

            # compute error for evaluation purposes
            err += chainer.functions.mean_squared_error(
                chainer.Variable(x_t), y).array.reshape(()) * params.batch_size

            outv[t] = xp.mean(v.array)

        # for each training sequence of this batch: compute loss for maintaining desired initial state variance
        if not single_recognition and use_init_state_loss:
            for s in range(len(c_train)):
                if gpu_id >= 0:
                    acc_init_loss += chainer.functions.gaussian_nll(
                        model.initial_states()[model.classes][s],
                        mean_init_states,
                        xp.ones(mean_init_states.shape) * exponential.log(
                            cuda.to_gpu(params.init_state_var, device=gpu_id)))
                else:
                    acc_init_loss += chainer.functions.gaussian_nll(
                        model.initial_states()[model.classes][s],
                        mean_init_states,
                        exponential.log(params.init_state_var))

            # compute gradients
            # (gradients from L_out and L_init are summed up)
            # gradient of initial states equals:
            # 1/params.init_state_var * (c0[cl]-mean_init_states).array
            acc_init_loss.backward()
        else:
            epochBatchProcessed = time.time()

        acc_loss.backward()

        print("update")
        optimizer.update()

        print("Done epoch " + str(epoch))
        error = err / params.batch_size / num_timesteps
        mean_estimated_var = xp.mean(outv)
        history_training_error[epoch] = error
        history_training_variance_estimation[epoch, :] = mean_estimated_var

        print("train MSE = " + str(error) + "\nmean estimated var: " +
              str(mean_estimated_var))
        print("init_states = [" + str(model.initial_states.W.array[0][0]) +
              "," + str(model.initial_states.W.array[0][1]) + "...], var: " +
              str(np.mean(np.var(model.initial_states.W.array, axis=0))) +
              ", accs: " + str(acc_loss) + " + " + str(acc_init_loss))

        likelihood_per_epoch.append(
            np.float64(acc_loss.array + acc_init_loss.array))

        history_init_state_var[epoch] = np.mean(
            np.var(model.initial_states.W.array, axis=0))

        with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
            f.write("epoch: " + str(epoch) + "\n")
            f.write("train MSE = " + str(error) + "\nmean estimated var: " +
                    str(mean_estimated_var))
            f.write("initial state var: " +
                    str(history_init_state_var[epoch]) + ", precision loss: " +
                    str(acc_loss) + ", variance loss: " + str(acc_init_loss) +
                    "\ninit states:\n")
            for i in range(num_classes):
                f.write("\t[" + str(model.initial_states.W[i][0]) + "," +
                        str(model.initial_states.W[i][1]) + "...]\n")
        f.close()

        if epoch % save_interval == 1 or epoch == params.epochs:
            # evaluate proactive generation
            res, resv, resm, u_h_history = model.generate(
                model.initial_states.W.array,
                num_timesteps_orig,
                add_variance_to_output=0,
                additional_output='activations',
                x_start=x_start)
            results = res  #cuda.to_cpu(res)

            plot_results(xp.copy(results),
                         num_timesteps_orig,
                         os.path.join(
                             save_dir, "proactive_epoch-" +
                             str(epoch).zfill(len(str(epochs)))),
                         params.num_io,
                         twoDim=True)

            for i in range(num_classes):
                generation_error = chainer.functions.mean_squared_error(
                    results[i, :], testing_data[i, :]).array.tolist()
                history_generation_error_proactive[i].append(generation_error)
                with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
                    f.write("pattern generation error (proactive): " +
                            str(generation_error) + "\n")
                f.close()

            # evaluate reactive generation
            res, resv, resm, pe, wpe, u_h_history, respost = model.generate(
                model.initial_states.W.array,
                num_timesteps_orig,
                external_input=xp.asarray(
                    testing_data[0::num_samples_per_class, :]),
                additional_output='activations',
                x_start=x_start)
            results = res  #cuda.to_cpu(res)

            plot_results(xp.copy(results),
                         num_timesteps_orig,
                         os.path.join(
                             save_dir, "reactive_epoch-" +
                             str(epoch).zfill(len(str(epochs)))),
                         params.num_io,
                         twoDim=True)

            for i in range(test_batch_size):
                generation_error = chainer.functions.mean_squared_error(
                    results[i, :], testing_data[i, :]).array.tolist()
                history_generation_error_reactive[i].append(generation_error)
                with open(os.path.join(save_dir, "evaluation.txt"), 'a') as f:
                    f.write("pattern generation error (reactive): " +
                            str(generation_error) + "\n")
                f.close()

        if epoch % save_model_interval == 1 or epoch == params.epochs:
            save_network(save_dir,
                         params,
                         model,
                         model_filename="network-epoch-" +
                         str(epoch).zfill(len(str(epochs))))
            np.save(os.path.join(save_dir, "history_init_state_var"),
                    np.array(history_init_state_var))
            np.save(
                os.path.join(save_dir, "history_generation_error_proactive"),
                np.array(history_generation_error_proactive))
            np.save(
                os.path.join(save_dir, "history_generation_error_reactive"),
                np.array(history_generation_error_reactive))
            np.save(os.path.join(save_dir, "history_training_error"),
                    np.array(history_training_error))
            np.save(
                os.path.join(save_dir, "history_training_variance_estimation"),
                np.array(history_training_variance_estimation))

            fig = plt.figure()
            ax = fig.add_subplot(111)
            ax.plot(np.arange(0, len(history_init_state_var)),
                    history_init_state_var)
            plt.title("init state variance")
            fig.savefig(os.path.join(save_dir, "init-state-var"))
            plt.close()

            fig = plt.figure()
            ax = fig.add_subplot(121)
            for i in range(num_classes):
                ax.plot(
                    np.arange(0, len(history_generation_error_proactive[i])) *
                    save_interval, history_generation_error_proactive[i])
            ax = fig.add_subplot(122)
            for i in range(num_classes):
                ax.plot(
                    np.arange(0, len(history_generation_error_reactive[i])) *
                    save_interval,
                    history_generation_error_reactive[i],
                    label=str(i))
            plt.title("generation error (proactive / reactive)")
            plt.legend()
            fig.savefig(os.path.join(save_dir, "generation-error"))
            plt.close()

            plt.figure()
            plt.plot(np.arange(len(all_std_diffs)),
                     all_std_diffs,
                     'bo',
                     label='std diff')
            plt.plot(np.arange(len(all_mean_diffs)),
                     all_mean_diffs,
                     'ro',
                     label='mean diff')
            plt.legend()
            plt.savefig(os.path.join(save_dir, 'convergence-condition.png'))
            plt.close()

        history_initial_states.append(model.initial_states.W.array.copy())

        # if no epoch number is decided, stop when error is below a threshold
        if not epochs:
            if error < 0.01:
                break

    save_network(save_dir, params, model, model_filename="network-final")

    return model.initial_states, history_initial_states, results, resm, save_dir
Ejemplo n.º 42
0
 def log_prob(self, x):
     return - _lbeta(self.alpha) \
         + sum_mod.sum((self.alpha - 1) * exponential.log(x), axis=-1)
Ejemplo n.º 43
0
 def entropy(self):
     return exponential.log(self.scale)
Ejemplo n.º 44
0
def _kl_bernoulli_bernoulli(dist1, dist2):
    return (dist1.logit - dist2.logit) * (dist1.p - 1.) \
        - exponential.log(exponential.exp(-dist1.logit) + 1) \
        + exponential.log(exponential.exp(-dist2.logit) + 1)
Ejemplo n.º 45
0
def _triangular_logdet(x):
    diag = diagonal.diagonal(x, axis1=-2, axis2=-1)
    return sum_mod.sum(exponential.log(abs(diag)), axis=-1)
Ejemplo n.º 46
0
def _kl_gamma_gamma(dist1, dist2):
    return (dist1.k - dist2.k) * digamma.digamma(dist1.k) \
        - (lgamma.lgamma(dist1.k) - lgamma.lgamma(dist2.k)) \
        + dist2.k\
        * (exponential.log(dist2.theta) - exponential.log(dist1.theta)) \
        + dist1.k * (dist1.theta / dist2.theta - 1)
Ejemplo n.º 47
0
 def _log_scale(self):
     return exponential.log(self.scale)
Ejemplo n.º 48
0
 def log_prob(self, x):
     return (
         - lgamma.lgamma(self._half_k)
         - self._half_k * numpy.log(2.)
         + (self._half_k - 1) * exponential.log(x)
         - 0.5 * x)
Ejemplo n.º 49
0
 def log_prob(self, x):
     logx = exponential.log(x)
     return LOGPROBC - exponential.log(self.sigma) - logx \
         - (0.5 * (logx - self.mu) ** 2 / self.sigma ** 2)
Ejemplo n.º 50
0
 def _log_alpha(self):
     return exponential.log(self.alpha)
Ejemplo n.º 51
0
 def _log_scale(self):
     return exponential.log(self.scale)
Ejemplo n.º 52
0
 def entropy(self):
     return exponential.log(self.scale)
Ejemplo n.º 53
0
 def log_prob(self, x):
     scale = self.scale
     return - exponential.log(2 * scale) - abs(x - self.loc) / scale
Ejemplo n.º 54
0
        # next time step to be predicted (for evaluation)
        x_t1 = x_batch[:, p.num_io * (t + 1):p.num_io * (t + 2)]

        # execute first forward step
        u_h, y, v = model(
            xp.copy(x_t), None
        )  # initial states of u_h are set automatically according to model.classes

        # noisy output estimation
        # y_out = y.array + xp.sqrt(v.array) * xp.random.randn()

        # compute prediction error, averaged over batch
        if prediction_error_type == 'standard':
            # compare network prediction to ground truth
            loss_i = chainer.functions.gaussian_nll(chainer.Variable(x_t1), y,
                                                    exponential.log(v))
        elif prediction_error_type == 'integrated':
            # compare network prediction to posterior of perception
            loss_i = chainer.functions.gaussian_nll(model.current_x, y,
                                                    exponential.log(v))
        acc_loss += loss_i

        # compute error for evaluation purposes
        err += chainer.functions.mean_squared_error(
            chainer.Variable(x_t), y).array.reshape(()) * p.batch_size

        estimated_variance[t] = xp.mean(v.array)

        # rollout trajectory
        for t in range(1, num_timesteps - 1):
            # current time step
Ejemplo n.º 55
0
 def entropy(self):
     return 1. + exponential.log(2 * self.scale)
Ejemplo n.º 56
0
def _kl_laplace_laplace(dist1, dist2):
    diff = abs(dist1.loc - dist2.loc)
    return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
        + diff / dist2.scale \
        + dist1.scale / dist2.scale * exponential.exp(- diff / dist1.scale) - 1
Ejemplo n.º 57
0
 def logit(self):
     if self.__logit is not None:
         return chainer.as_variable(self.__logit)
     else:
         return exponential.log(self.p) - logarithm_1p.log1p(-self.p)
Ejemplo n.º 58
0
 def entropy(self):
     return 1. + exponential.log(2 * self.scale)
Ejemplo n.º 59
0
 def log_prob(self, x):
     bl = broadcast.broadcast_to(self.loc, x.shape)
     bs = broadcast.broadcast_to(self.scale, x.shape)
     return - exponential.log(2 * bs) - abs(x - bl) / bs
Ejemplo n.º 60
0
 def log_prob(self, x):
     return - _lbeta(self.alpha) \
         + sum_mod.sum((self.alpha - 1) * exponential.log(x), axis=-1)