Example #1
0
def adam(data,
         paramvec,
         loss,
         batch_size,
         rate,
         epochs=1,
         b1=0.9,
         b2=0.999,
         epsilon=1e-8,
         callback=None):
    m = np.zeros_like(paramvec)
    v = np.zeros_like(paramvec)
    vals = []
    i = 0

    for epoch in range(epochs):
        for minibatch in make_batches(batch_size, data):
            val, g = vgrad(loss)(paramvec, *minibatch)
            m = (1. - b1) * g + b1 * m
            v = (1. - b2) * g**2 + b2 * v
            mhat = m / (1 - b1**(i + 1))
            vhat = v / (1 - b2**(i + 1))
            paramvec -= rate * mhat / (np.sqrt(vhat) + epsilon)
            vals.append(val)
            i += 1
        if callback: callback(epoch, paramvec, vals)
    return paramvec
Example #2
0
 def EM_update(params):
     natural_params = list(map(np.log, params))
     loglike, E_stats = vgrad(log_partition_function)(natural_params,
                                                      data)  # E step
     if callback:
         callback(loglike, params)
     # M step
     return list(map(normalize, E_stats))
Example #3
0
 def gradfun(params, i):
     pgm_params, loglike_params, recogn_params = params
     objective = lambda (loglike_params, recogn_params): \
         -mc_elbo(pgm_params, loglike_params, recogn_params, i)
     val, (loglike_grad, recogn_grad) = vgrad(objective)((loglike_params, recogn_params))
     pgm_natgrad = -natgrad_scale / num_datapoints * \
         (flat(pgm_prior) + num_batches*flat(saved.stats) - flat(pgm_params))
     grad = unflat(pgm_natgrad), loglike_grad, recogn_grad
     if callback: callback(i, val, params, grad)
     return grad
Example #4
0
 def gradfun(params, i):
     pgm_params, loglike_params, recogn_params = params
     objective = lambda (loglike_params, recogn_params): \
         -mc_elbo(pgm_params, loglike_params, recogn_params, i)
     val, (loglike_grad, recogn_grad) = vgrad(objective)((loglike_params, recogn_params))
     # this expression for pgm_natgrad drops a term that can be computed using
     # the function autograd.misc.fixed_points.fixed_point
     pgm_natgrad = -natgrad_scale / num_datapoints * \
         (flat(pgm_prior) + num_batches*flat(saved.stats) - flat(pgm_params))
     grad = unflat(pgm_natgrad), loglike_grad, recogn_grad
     if callback: callback(i, val, params, grad)
     return grad
Example #5
0
def rmsprop(data, paramvec, loss, batch_size, rate,
            epochs=1, rho=0.9, epsilon=1e-6, callback=None):
    sumsq = np.zeros_like(paramvec)
    vals = []

    for epoch in range(epochs):
        for minibatch in make_batches(batch_size, data):
            val, g = vgrad(loss)(paramvec, *minibatch)
            sumsq = rho*sumsq + (1.-rho)*g**2
            paramvec = paramvec - rate * g / np.sqrt(sumsq + epsilon)
            vals.append(val)
        if callback: callback(epoch, paramvec, vals)
    return paramvec
Example #6
0
def adadelta(paramvec, loss, batches, epochs=1, rho=0.95, epsilon=1e-6, callback=None):
    sum_gsq = np.zeros_like(paramvec)
    sum_usq = np.zeros_like(paramvec)
    vals = []

    for epoch in range(epochs):
        permuted_batches = [batches[i] for i in npr.permutation(len(batches))]
        for im, angle in permuted_batches:
            val, g = vgrad(loss)(paramvec, im, angle)
            sum_gsq = rho*sum_gsq + (1.-rho)*g**2
            ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g
            sum_usq = rho*sum_usq + (1.-rho)*ud**2
            paramvec = paramvec + ud
            vals.append(val)
        if callback: callback(epoch, paramvec, vals, permuted_batches)
    return paramvec
Example #7
0
def adam(data, paramvec, loss, batch_size, rate,
         epochs=1, b1=0.9, b2=0.999, epsilon=1e-8, callback=None):
    m = np.zeros_like(paramvec)
    v = np.zeros_like(paramvec)
    vals = []
    i = 0

    for epoch in range(epochs):
        for minibatch in make_batches(batch_size, data):
            val, g = vgrad(loss)(paramvec, *minibatch)
            m = (1. - b1)*g    + b1*m
            v = (1. - b2)*g**2 + b2*v
            mhat = m / (1 - b1**(i+1))
            vhat = v / (1 - b2**(i+1))
            paramvec -= rate * mhat / (np.sqrt(vhat) + epsilon)
            vals.append(val)
            i += 1
        if callback: callback(epoch, paramvec, vals)
    return paramvec
Example #8
0
def rmsprop(data,
            paramvec,
            loss,
            batch_size,
            rate,
            epochs=1,
            rho=0.9,
            epsilon=1e-6,
            callback=None):
    sumsq = np.zeros_like(paramvec)
    vals = []

    for epoch in range(epochs):
        for minibatch in make_batches(batch_size, data):
            val, g = vgrad(loss)(paramvec, *minibatch)
            sumsq = rho * sumsq + (1. - rho) * g**2
            paramvec = paramvec - rate * g / np.sqrt(sumsq + epsilon)
            vals.append(val)
        if callback: callback(epoch, paramvec, vals)
    return paramvec
Example #9
0
def adadelta(paramvec,
             loss,
             batches,
             epochs=1,
             rho=0.95,
             epsilon=1e-6,
             callback=None):
    sum_gsq = np.zeros_like(paramvec)
    sum_usq = np.zeros_like(paramvec)
    vals = []

    for epoch in range(epochs):
        permuted_batches = [batches[i] for i in npr.permutation(len(batches))]
        for im, angle in permuted_batches:
            val, g = vgrad(loss)(paramvec, im, angle)
            sum_gsq = rho * sum_gsq + (1. - rho) * g**2
            ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g
            sum_usq = rho * sum_usq + (1. - rho) * ud**2
            paramvec = paramvec + ud
            vals.append(val)
        if callback: callback(epoch, paramvec, vals, permuted_batches)
    return paramvec
Example #10
0
            pair_params, node_params, alphal, betal,
            np.zeros_like(node_params), np.zeros_like(pair_params))

    expected_stats = expected_states[0], expected_transcounts, expected_states

    return log_normalizer, expected_stats


def hmm_logZ_python(natparam):
    init_params, pair_params, node_params = natparam

    log_alpha = init_params + node_params[0]
    for node_param in node_params[1:]:
        log_alpha = logsumexp(log_alpha[:,None] + pair_params, axis=0) + node_param

    return logsumexp(log_alpha)


def hmm_viterbi(natparam):
    init_params, pair_params, node_params = natparam
    T = node_params.shape[0]

    C = lambda x: np.require(x, requirements='C')
    pair_params, node_params, init_params = \
        C(np.exp(pair_params)), C(node_params), C(np.exp(init_params))

    return _viterbi(pair_params, node_params, init_params,
                    np.zeros(T, dtype=np.int32))

hmm_estep_slow = vgrad(hmm_logZ)
Example #11
0
 def EM_update(params):
     natural_params = list(map(np.log, params))
     loglike, E_stats = vgrad(log_partition_function)(natural_params, data)  # E step
     if callback: callback(loglike, params)
     return list(map(normalize, E_stats))                                    # M step
Example #12
0
 def gradfun(y_n, N, L, eta, phi, psi):
     objective = lambda (phi, psi): mc_vlb(eta, phi, psi, y_n, N, L)
     vlb, (phi_grad, psi_grad) = vgrad(objective)((phi, psi))
     eta_natgrad = sub(add(eta_prior, saved.stats), eta)
     return vlb, (eta_natgrad, phi_grad, psi_grad)
Example #13
0
 def gradfun(y_n, N, L, eta, phi, psi):
     objective = lambda (phi, psi): mc_vlb(eta, phi, psi, y_n, N, L)
     vlb, (phi_grad, psi_grad) = vgrad(objective)((phi, psi))
     eta_natgrad = sub(add(eta_prior, saved.stats), eta)
     return vlb, (eta_natgrad, phi_grad, psi_grad)
Example #14
0
 def flat_gradfun(y_n, N, L, eta, phi, psi):
     objective = lambda (eta, phi, psi): mc_vlb(eta, phi, psi, y_n, N, L)
     return vgrad(objective)((eta, phi, psi))
Example #15
0
 def flat_gradfun(y_n, N, L, eta, phi, psi):
     objective = lambda (eta, phi, psi): mc_vlb(eta, phi, psi, y_n, N, L)
     return vgrad(objective)((eta, phi, psi))