def adam(data, paramvec, loss, batch_size, rate, epochs=1, b1=0.9, b2=0.999, epsilon=1e-8, callback=None): m = np.zeros_like(paramvec) v = np.zeros_like(paramvec) vals = [] i = 0 for epoch in range(epochs): for minibatch in make_batches(batch_size, data): val, g = vgrad(loss)(paramvec, *minibatch) m = (1. - b1) * g + b1 * m v = (1. - b2) * g**2 + b2 * v mhat = m / (1 - b1**(i + 1)) vhat = v / (1 - b2**(i + 1)) paramvec -= rate * mhat / (np.sqrt(vhat) + epsilon) vals.append(val) i += 1 if callback: callback(epoch, paramvec, vals) return paramvec
def EM_update(params): natural_params = list(map(np.log, params)) loglike, E_stats = vgrad(log_partition_function)(natural_params, data) # E step if callback: callback(loglike, params) # M step return list(map(normalize, E_stats))
def gradfun(params, i): pgm_params, loglike_params, recogn_params = params objective = lambda (loglike_params, recogn_params): \ -mc_elbo(pgm_params, loglike_params, recogn_params, i) val, (loglike_grad, recogn_grad) = vgrad(objective)((loglike_params, recogn_params)) pgm_natgrad = -natgrad_scale / num_datapoints * \ (flat(pgm_prior) + num_batches*flat(saved.stats) - flat(pgm_params)) grad = unflat(pgm_natgrad), loglike_grad, recogn_grad if callback: callback(i, val, params, grad) return grad
def gradfun(params, i): pgm_params, loglike_params, recogn_params = params objective = lambda (loglike_params, recogn_params): \ -mc_elbo(pgm_params, loglike_params, recogn_params, i) val, (loglike_grad, recogn_grad) = vgrad(objective)((loglike_params, recogn_params)) # this expression for pgm_natgrad drops a term that can be computed using # the function autograd.misc.fixed_points.fixed_point pgm_natgrad = -natgrad_scale / num_datapoints * \ (flat(pgm_prior) + num_batches*flat(saved.stats) - flat(pgm_params)) grad = unflat(pgm_natgrad), loglike_grad, recogn_grad if callback: callback(i, val, params, grad) return grad
def rmsprop(data, paramvec, loss, batch_size, rate, epochs=1, rho=0.9, epsilon=1e-6, callback=None): sumsq = np.zeros_like(paramvec) vals = [] for epoch in range(epochs): for minibatch in make_batches(batch_size, data): val, g = vgrad(loss)(paramvec, *minibatch) sumsq = rho*sumsq + (1.-rho)*g**2 paramvec = paramvec - rate * g / np.sqrt(sumsq + epsilon) vals.append(val) if callback: callback(epoch, paramvec, vals) return paramvec
def adadelta(paramvec, loss, batches, epochs=1, rho=0.95, epsilon=1e-6, callback=None): sum_gsq = np.zeros_like(paramvec) sum_usq = np.zeros_like(paramvec) vals = [] for epoch in range(epochs): permuted_batches = [batches[i] for i in npr.permutation(len(batches))] for im, angle in permuted_batches: val, g = vgrad(loss)(paramvec, im, angle) sum_gsq = rho*sum_gsq + (1.-rho)*g**2 ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g sum_usq = rho*sum_usq + (1.-rho)*ud**2 paramvec = paramvec + ud vals.append(val) if callback: callback(epoch, paramvec, vals, permuted_batches) return paramvec
def adam(data, paramvec, loss, batch_size, rate, epochs=1, b1=0.9, b2=0.999, epsilon=1e-8, callback=None): m = np.zeros_like(paramvec) v = np.zeros_like(paramvec) vals = [] i = 0 for epoch in range(epochs): for minibatch in make_batches(batch_size, data): val, g = vgrad(loss)(paramvec, *minibatch) m = (1. - b1)*g + b1*m v = (1. - b2)*g**2 + b2*v mhat = m / (1 - b1**(i+1)) vhat = v / (1 - b2**(i+1)) paramvec -= rate * mhat / (np.sqrt(vhat) + epsilon) vals.append(val) i += 1 if callback: callback(epoch, paramvec, vals) return paramvec
def rmsprop(data, paramvec, loss, batch_size, rate, epochs=1, rho=0.9, epsilon=1e-6, callback=None): sumsq = np.zeros_like(paramvec) vals = [] for epoch in range(epochs): for minibatch in make_batches(batch_size, data): val, g = vgrad(loss)(paramvec, *minibatch) sumsq = rho * sumsq + (1. - rho) * g**2 paramvec = paramvec - rate * g / np.sqrt(sumsq + epsilon) vals.append(val) if callback: callback(epoch, paramvec, vals) return paramvec
def adadelta(paramvec, loss, batches, epochs=1, rho=0.95, epsilon=1e-6, callback=None): sum_gsq = np.zeros_like(paramvec) sum_usq = np.zeros_like(paramvec) vals = [] for epoch in range(epochs): permuted_batches = [batches[i] for i in npr.permutation(len(batches))] for im, angle in permuted_batches: val, g = vgrad(loss)(paramvec, im, angle) sum_gsq = rho * sum_gsq + (1. - rho) * g**2 ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g sum_usq = rho * sum_usq + (1. - rho) * ud**2 paramvec = paramvec + ud vals.append(val) if callback: callback(epoch, paramvec, vals, permuted_batches) return paramvec
pair_params, node_params, alphal, betal, np.zeros_like(node_params), np.zeros_like(pair_params)) expected_stats = expected_states[0], expected_transcounts, expected_states return log_normalizer, expected_stats def hmm_logZ_python(natparam): init_params, pair_params, node_params = natparam log_alpha = init_params + node_params[0] for node_param in node_params[1:]: log_alpha = logsumexp(log_alpha[:,None] + pair_params, axis=0) + node_param return logsumexp(log_alpha) def hmm_viterbi(natparam): init_params, pair_params, node_params = natparam T = node_params.shape[0] C = lambda x: np.require(x, requirements='C') pair_params, node_params, init_params = \ C(np.exp(pair_params)), C(node_params), C(np.exp(init_params)) return _viterbi(pair_params, node_params, init_params, np.zeros(T, dtype=np.int32)) hmm_estep_slow = vgrad(hmm_logZ)
def EM_update(params): natural_params = list(map(np.log, params)) loglike, E_stats = vgrad(log_partition_function)(natural_params, data) # E step if callback: callback(loglike, params) return list(map(normalize, E_stats)) # M step
def gradfun(y_n, N, L, eta, phi, psi): objective = lambda (phi, psi): mc_vlb(eta, phi, psi, y_n, N, L) vlb, (phi_grad, psi_grad) = vgrad(objective)((phi, psi)) eta_natgrad = sub(add(eta_prior, saved.stats), eta) return vlb, (eta_natgrad, phi_grad, psi_grad)
def flat_gradfun(y_n, N, L, eta, phi, psi): objective = lambda (eta, phi, psi): mc_vlb(eta, phi, psi, y_n, N, L) return vgrad(objective)((eta, phi, psi))