def adam_minimax(grad_both, init_params_max, init_params_min, callback=None, num_iters=100, step_size_max=0.001, step_size_min=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam modified to do minimiax optimization, for instance to help with training generative adversarial networks.""" x_max, unflatten_max = flatten(init_params_max) x_min, unflatten_min = flatten(init_params_min) m_max = np.zeros(len(x_max)) v_max = np.zeros(len(x_max)) m_min = np.zeros(len(x_min)) v_min = np.zeros(len(x_min)) for i in range(num_iters): g_max_uf, g_min_uf = grad_both(unflatten_max(x_max), unflatten_min(x_min), i) g_max, _ = flatten(g_max_uf) g_min, _ = flatten(g_min_uf) if callback: callback(unflatten_max(x_max), unflatten_min(x_min), i, unflatten_max(g_max), unflatten_min(g_min)) m_max = (1 - b1) * g_max + b1 * m_max # First moment estimate. v_max = (1 - b2) * (g_max**2) + b2 * v_max # Second moment estimate. mhat_max = m_max / (1 - b1**(i + 1)) # Bias correction. vhat_max = v_max / (1 - b2**(i + 1)) x_max = x_max + step_size_max * mhat_max / (np.sqrt(vhat_max) + eps) m_min = (1 - b1) * g_min + b1 * m_min # First moment estimate. v_min = (1 - b2) * (g_min**2) + b2 * v_min # Second moment estimate. mhat_min = m_min / (1 - b1**(i + 1)) # Bias correction. vhat_min = v_min / (1 - b2**(i + 1)) x_min = x_min - step_size_min * mhat_min / (np.sqrt(vhat_min) + eps) return unflatten_max(x_max), unflatten_min(x_min)
def test_flatten_empty(): val = (npr.randn(4), [npr.randn(3, 4), 2.5], (), (2.0, [1.0, npr.randn(2)])) vect, unflatten = flatten(val) val_recovered = unflatten(vect) vect_2, _ = flatten(val_recovered) assert np.all(vect == vect_2)
def _optimize(grad, x0, gargs, callback=None, *args, **kwargs): _x0, unflatten = flatten(x0) _grad = lambda x, i: flatten(grad(unflatten(x), i, gargs))[0] if callback: _callback = lambda x, i, g: callback(unflatten(x), i, unflatten(g), gargs) else: _callback = None return unflatten(optimize(_grad, _x0, gargs, _callback, *args, **kwargs))
def flatmap(f, container): flatten = lambda lst: [item for sublst in lst for item in sublst] mappers = { np.ndarray: lambda f, arr: f(arr), list: lambda f, lst: flatten(map(f, lst)), dict: lambda f, dct: flatten(map(f, dct.values())) } return mappers[type(container)](f, container)
def _optimize(grad, x0, imgsize, callback=None, *args, **kwargs): print('Optimzers: 21 = ', x0) _x0, unflatten = flatten(x0) _grad = lambda x, i: flatten(grad(unflatten(x), i))[0] if callback: _callback = lambda x, i, g: callback(unflatten(x), i, unflatten(g)) else: _callback = None return unflatten( optimize(_grad, _x0, imgsize, _callback, *args, **kwargs))
def test_flatten_dict(): val = {'k': npr.random((4, 4)), 'k2': npr.random((3, 3)), 'k3': 3.0, 'k4': [1.0, 4.0, 7.0, 9.0]} vect, unflatten = flatten(val) val_recovered = unflatten(vect) vect_2, _ = flatten(val_recovered) assert np.all(vect == vect_2)
def _optimize(fun, grad, x0, callback=None, *args, **kwargs): _x0, unflatten = flatten(x0) _fun = lambda x: flatten(fun(unflatten(x)))[0] _grad = lambda x: flatten(grad(unflatten(x)))[0] if callback: _callback = lambda x: callback(unflatten(x)) else: _callback = None result = optimize(_fun, _grad, _x0, _callback, *args, **kwargs) return unflatten(result.x), result.fun
def convex_combination(curr, target, alpha): """ Output next = (1-alpha) * target + alpha * curr where target, curr, and next can be trees of nested containers with arrays/scalars at the leaves. Assume curr and target have the same structure. """ assert alpha >= 0 and alpha <= 1 _curr, unflatten = flatten(curr) _target, _ = flatten(target) return unflatten(alpha * _curr + (1 - alpha) * _target)
def gd_step(cost, params, lrate): """Perform one gradient descent step on the given cost function with learning rate lrate. Returns a new set of parameters, and (IMPORTANT) does not modify the input parameters.""" ### YOUR CODE HERE cost_grad = ag.grad(cost) param_grads = cost_grad(params) flat_params, unflatten_func = flatten(params) flat_grads, unflatten_func2 =flatten(param_grads) return unflatten_func(flat_params - flat_grads * lrate)
def test_flatten(): r = np.random.randn x = (1.0, r(2,3), [r(1,4), {'x': 2.0, 'y': r(4,2)}]) x_flat, unflatten = flatten(x) assert x_flat.shape == (20,) assert x_flat[0] == 1.0 assert np.all(x_flat == flatten(unflatten(x_flat))[0]) y = (1.0, 2.0, [3.0, {'x': 2.0, 'y': 4.0}]) y_flat, unflatten = flatten(y) assert y_flat.shape == (5,) assert y == unflatten(y_flat)
def test_flatten_dict(): val = { 'k': npr.random((4, 4)), 'k2': npr.random((3, 3)), 'k3': 3.0, 'k4': [1.0, 4.0, 7.0, 9.0] } vect, unflatten = flatten(val) val_recovered = unflatten(vect) vect_2, _ = flatten(val_recovered) assert np.all(vect == vect_2)
def test_flatten(): r = np.random.randn x = (1.0, r(2, 3), [r(1, 4), {'x': 2.0, 'y': r(4, 2)}]) x_flat, unflatten = flatten(x) assert x_flat.shape == (20, ) assert x_flat[0] == 1.0 assert np.all(x_flat == flatten(unflatten(x_flat))[0]) y = (1.0, 2.0, [3.0, {'x': 2.0, 'y': 4.0}]) y_flat, unflatten = flatten(y) assert y_flat.shape == (5, ) assert y == unflatten(y_flat)
def adam_minimax(grad_both, init_params_max, init_params_min, callback=None, num_iters=100, max_iters=1, n_critic=5, step_size_max=0.0001, step_size_min=0.0001, b1=0.0, b2=0.9, eps=10**-8): """Adam modified to do minimiax optimization, for instance to help with training generative adversarial networks.""" x_max, unflatten_max = flatten(init_params_max) x_min, unflatten_min = flatten(init_params_min) m_max = np.zeros(len(x_max)) v_max = np.zeros(len(x_max)) m_min = np.zeros(len(x_min)) v_min = np.zeros(len(x_min)) for i in range(num_iters): for t in range(n_critic): g_max_uf, g_min_uf = grad_both(unflatten_max(x_max), unflatten_min(x_min), i) g_max, _ = flatten(g_max_uf) g_min, _ = flatten(g_min_uf) if callback: callback(unflatten_max(x_max), unflatten_min(x_min), i, unflatten_max(g_max), unflatten_min(g_min)) for i in range(max_iters): m_max = (1 - b1) * g_max + b1 * m_max # First moment estimate. v_max = (1 - b2) * (g_max** 2) + b2 * v_max # Second moment estimate. mhat_max = m_max / (1 - b1**(i + 1)) # Bias correction. vhat_max = v_max / (1 - b2**(i + 1)) x_max = x_max + step_size_max * mhat_max / (np.sqrt(vhat_max) + eps) m_min = (1 - b1) * g_min + b1 * m_min # First moment estimate. v_min = (1 - b2) * (g_min** 2) + b2 * v_min # Second moment estimate. mhat_min = m_min / (1 - b1**(i + 1)) # Bias correction. vhat_min = v_min / (1 - b2**(i + 1)) x_min = x_min - step_size_min * mhat_min / (np.sqrt(vhat_min) + eps) return unflatten_max(x_max), unflatten_min(x_min)
def adam_maximin(grad_both, init_params_max, init_params_min, callback=None, num_iters=100, step_size_max=0.001, step_size_min=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam modified to do minimiax optimization, for instance to help with training generative adversarial networks.""" subspace_training = init_params_max[3] if subspace_training: trainable_param_idx = 0 else: trainable_param_idx = 2 x_max, unflatten_max = flatten(init_params_max[trainable_param_idx] ) # Pick and flatten the trainable params x_min, unflatten_min = flatten(init_params_min[trainable_param_idx]) m_max = np.zeros(len(x_max)) v_max = np.zeros(len(x_max)) m_min = np.zeros(len(x_min)) v_min = np.zeros(len(x_min)) for i in range(num_iters): g_max_uf, g_min_uf = grad_both(unflatten_max(x_max), unflatten_min(x_min), i) g_max, _ = flatten(g_max_uf) g_min, _ = flatten(g_min_uf) if callback: callback(unflatten_max(x_max), unflatten_min(x_min), init_params_max, init_params_min, i) m_min = (1 - b1) * g_min + b1 * m_min # First moment estimate. v_min = (1 - b2) * (g_min**2) + b2 * v_min # Second moment estimate. mhat_min = m_min / (1 - b1**(i + 1)) # Bias correction. vhat_min = v_min / (1 - b2**(i + 1)) x_min = x_min - step_size_min * mhat_min / (np.sqrt(vhat_min) + eps) m_max = (1 - b1) * g_max + b1 * m_max # First moment estimate. v_max = (1 - b2) * (g_max**2) + b2 * v_max # Second moment estimate. mhat_max = m_max / (1 - b1**(i + 1)) # Bias correction. vhat_max = v_max / (1 - b2**(i + 1)) x_max = x_max + step_size_max * mhat_max / (np.sqrt(vhat_max) + eps) return unflatten_max(x_max), unflatten_min(x_min)
def taylor_expansion(self, b, u): for t in range(self.nb_steps): _in = tuple([b.mu[..., t], b.sigma[..., t], u[..., t]]) _in_flat, _unflatten = flatten(_in) def _ekf_flat(_in_flat): return flatten(self.ekf(*_unflatten(_in_flat)))[0] _ekf_jac = jacobian(_ekf_flat) _grads = _ekf_jac(_in_flat) self.F[..., t] = _grads[:self.nb_bdim, :self.nb_bdim] self.G[..., t] = _grads[:self.nb_bdim, -self.nb_udim:] self.X[..., t] = _grads[self.nb_bdim:self.nb_bdim + self.nb_bdim * self.nb_bdim, :self.nb_bdim] self.Y[..., t] = _grads[self.nb_bdim:self.nb_bdim + self.nb_bdim * self.nb_bdim, self.nb_bdim:self.nb_bdim + self.nb_bdim * self.nb_bdim] self.Z[..., t] = _grads[self.nb_bdim:self.nb_bdim + self.nb_bdim * self.nb_bdim, -self.nb_udim:] self.T[..., t] = _grads[self.nb_bdim + self.nb_bdim * self.nb_bdim:, :self.nb_bdim] self.U[..., t] = _grads[self.nb_bdim + self.nb_bdim * self.nb_bdim:, self.nb_bdim:self.nb_bdim + self.nb_bdim * self.nb_bdim] self.V[..., t] = _grads[self.nb_bdim + self.nb_bdim * self.nb_bdim:, -self.nb_udim:]
def _generic_minimize(method, loss, x0, verbose=False, nb_iter=1000, full_output=False): _x0, unflatten = flatten(x0) _objective = lambda x_flat, itr: loss(unflatten(x_flat), itr) if verbose: print("Fitting with {}.".format(method)) # Specify callback for fitting itr = [0] def callback(x_flat): itr[0] += 1 print("Iteration {} loss: {:.3f}".format(itr[0], loss(unflatten(x_flat), -1))) # Call the optimizer. # HACK: Pass in -1 as the iteration. result = minimize(_objective, _x0, args=(-1, ), jac=grad(_objective), method=method, callback=callback if verbose else None, options=dict(maxiter=nb_iter, disp=verbose)) if full_output: return unflatten(result.x), result else: return unflatten(result.x)
def objective_train(self, params, t): pars, d_pars = params y_hats = [self.forward_pass2(params, self.train_x) for _ in range(10)] y_hats = np.array(y_hats) y_hats = np.reshape(y_hats, (10, len(self.train_x))) mean = y_hats.mean(axis=0) std = y_hats.std(axis=0) sqerr = np.sum((mean.reshape((len(mean), 1)) - self.train_y)**2) y_hat_q = [self.forward_pass2(params, self.test_x) for _ in range(10)] y_hat_q = np.array(y_hat_q) y_hat_q = np.reshape(y_hat_q, (10, len(self.test_x))) std_q = y_hat_q.std(axis=0) sqn = 1 - (1 / (1 + std_q)) spn = 1 - (1 / (1 + std)) ce_sqn = np.sum(np.log(1 - sqn)) ce_spn = np.sum(np.log(spn)) ce_loss = 1.0 * ce_sqn + 0.1 * ce_spn return 5.0 * sqerr + 0.05 * ce_loss + 0.001 * np.sum( flatten(params)[0]**2)
def objective_train(self, params, t): pars, d_pars = params n, d = self.train_y.shape m = self.test_x.shape[0] y_hats = [self.forward_pass2(params, self.train_x) for _ in range(10)] y_hats = np.array(y_hats) y_hats = np.reshape(y_hats, (10, n, d)) mean = y_hats.mean(axis=0) # covariance determinant std = self.generalized_variance(y_hats) sqerr = np.linalg.norm((mean.reshape((n, d)) - self.train_y))**2 y_hat_q = [self.forward_pass2(params, self.test_x) for _ in range(10)] y_hat_q = np.array(y_hat_q) y_hat_q = np.reshape(y_hat_q, (10, m, d)) std_q = self.generalized_variance(y_hat_q) sqn = 1 - (1 / (1 + std_q)) spn = 1 - (1 / (1 + std)) ce_sqn = np.sum(np.log(1 - sqn)) ce_spn = np.sum(np.log(spn)) ce_loss = 1.0 * ce_sqn + 0.1 * ce_spn return 5.0 * sqerr + 0.05 * ce_loss + 0.001 * np.sum( flatten(params)[0]**2)
def make_gradfun(run_inference, recognize, loglike, pgm_prior, data, batch_size, num_samples, natgrad_scale=1., callback=callback): _, unflat = flatten(pgm_prior) num_datapoints = get_num_datapoints(data) data_batches, num_batches = split_into_batches(data, batch_size) get_batch = lambda i: data_batches[i % num_batches] saved = lambda: None def mc_elbo(pgm_params, loglike_params, recogn_params, i): nn_potentials = recognize(recogn_params, get_batch(i)) samples, saved.stats, global_kl, local_kl = \ run_inference(pgm_prior, pgm_params, nn_potentials, num_samples) return (num_batches * loglike(loglike_params, samples, get_batch(i)) - global_kl - num_batches * local_kl) / num_datapoints def gradfun(params, i): pgm_params, loglike_params, recogn_params = params objective = lambda loglike_params, recogn_params: \ -mc_elbo(pgm_params, loglike_params, recogn_params, i) val, (loglike_grad, recogn_grad) = vgrad(objective)( (loglike_params, recogn_params)) pgm_natgrad = -natgrad_scale / num_datapoints * \ (flat(pgm_prior) + num_batches*flat(saved.stats) - flat(pgm_params)) grad = unflat(pgm_natgrad), loglike_grad, recogn_grad if callback: callback(i, val, params, grad) return grad return gradfun
def _generic_minimize(method, loss, x0, verbose=False, num_iters=1000): """ Minimize a given loss function with scipy.optimize.minimize. """ itr = [0] def callback(x): itr[0] += 1 print("Iteration {} loss: {:.3f}".format(itr[0], loss(x, -1))) # Flatten the loss _x0, unflatten = flatten(x0) _objective = lambda x_flat, itr: loss(unflatten(x_flat), itr) if verbose: print("Fitting with {}.".format(method)) # Call the optimizer. # HACK: Pass in -1 as the iteration. result = minimize(_objective, _x0, args=(-1, ), jac=grad(_objective), method=method, callback=callback if verbose else None, options=dict(maxiter=num_iters, disp=verbose)) if verbose: print("{} completed with message: \n{}".format(method, result.message)) if not result.success: warn("{} failed with message:\n{}".format(method, result.message)) return unflatten(result.x)
def optimise(self, params, num_ite=1e5): t0 = time() ll0 = self.logevidence(params, self.yknt, self.xt) x0, _ = flatten(params) method = 'L-BFGS-B' # method = 'SLSQP' self.res = minimize(self.objective, x0=x0, method=method, jac=self.grad_obj, options={ 'disp': self.disp, 'maxiter': num_ite, 'gtol': 1e-7, 'ftol': 1e-7 }, callback=self.callback, bounds=self.bounds) self.params = self.unflatten(self.res['x']) self.params[1] = myqr(self.params[1])[0] self.dict['success'] = self.res['success'] self.dict['nit'] = self.res['nit'] print('\nsuccess: {0}'.format(self.res['success'])) print('\nnit: {0}'.format(self.res['nit'])) print('\ncause: {0}'.format(self.res['message'])) self.ll = self.logevidence(self.params, self.yknt, self.xt) self.dict['params'] = self.params self.dict['ll'].append(self.ll) dur = (time() - t0) / 60 self.dict['dur'] = dur print('optimisation terminated... \nexecution time: {0}'.format(dur))
def disc(dsc_params, dsc_layer_sizes, gen_params, data): """Uncomment/comment to determine if the discriminator sees the generator's params.""" # return neural_net_predict(dsc_params, data) N, D = data.shape flat_params, _ = flatten(gen_params) data_and_params = np.hstack([data, np.tile(flat_params, (N, 1))]) return neural_net_predict(dsc_params, dsc_layer_sizes, data_and_params)
def model_loss(params, step): W, b1, W2_1, b2_1, W2_2, b2_2 = params W_norm = W #batch_normalize(W) # W2_1 = batch_normalize(W2_1) # W2_2 = batch_normalize(W2_2) X, y, task = get_batch(step) prod = W_norm @ X + b1 nonlin = relu(prod) if use_dropout: nonlin *= np.random.binomial( [np.ones((len(prod), nonlin.shape[1]))], 1 - self.dropout_percent)[0] * (1.0 / (1 - self.dropout_percent)) if task == 1: out = (W2_1 @ nonlin) + b2_1 else: out = (W2_2 @ nonlin) + b2_2 prob = np.exp(out / self.T) / np.sum(np.exp(out / self.T)) L = loss(y, prob) # task relatedness if regularize: a_bar = (flatten(self.task_1.W)[0] + flatten(self.task_2.W)[0]) / 2 a_bar_norm = np.linalg.norm(a_bar, 2) source_norm = np.linalg.norm( flatten(self.task_1.W)[0] - a_bar, 2) tar_norm = np.linalg.norm(flatten(self.task_2.W)[0] - a_bar, 2) reg = a_bar_norm + 0.1 * (source_norm + tar_norm) / 2 else: reg = 0 # bhattacharya penalty P_s_prime = relu(((W_norm @ X_shuf.T) + b1)).T.mean(axis=0) P_t_prime = relu(((W_norm @ X_tar_shuf.T) + b1)).T.mean(axis=0) P_s = P_s_prime / (np.sum(P_s_prime)) P_t = P_t_prime / (np.sum(P_t_prime)) m = np.multiply(P_s, P_t) bt_distance = -(np.log(np.sum(P_s * P_t))) return L + 0.3 * bt_distance #+ 0.3 * reg
def cat_vae(X, var_size, encoder_params, decoder_params, T, q, s, beta, mcmc): def objective(params_tuple): encoder_params, decoder_params = params_tuple return -Loss_function(encoder_params, decoder_params, x, var_size, beta, mcmc) #calculate nro of parameters nro_paramsEnc = nro_params_in_NN(encoder_params) nro_paramsDec = nro_params_in_NN(decoder_params) #m=q*n, where m is the subsample size used in the stochastic gradient descent n, d = X.shape m = int(q * len(X)) #initialize for Adam-optimizer me = np.zeros(nro_paramsEnc) ve = np.zeros(nro_paramsEnc) md = np.zeros(nro_paramsDec) vd = np.zeros(nro_paramsDec) #monitoring of losses losses = [] for t in range(T): objective_grad = grad(objective) subsample_ind = random.sample(range(len(X)), m) x = X[subsample_ind] grad_enc, grad_dec = objective_grad((encoder_params, decoder_params)) gradientti_enc, unflatten_enc = flatten(grad_enc) gradientti_dec, unflatten_dec = flatten(grad_dec) #loss = Loss_function(encoder_params, decoder_params, x,var_size,beta,mcmc) /m #losses.append(loss) #Update encoder update, me, ve = adam(gradientti_enc, me, ve, t, step_size=s) encoder_params = addition(encoder_params, unflatten_enc(update)) #Update decoder update, md, vd = adam(gradientti_dec, md, vd, t, step_size=s) decoder_params = addition(decoder_params, unflatten_dec(update)) #if t % 10 == 0: #print(losses[t]) return ([encoder_params, decoder_params])
def __init__(self, configurations, parameters, controls): self.configurations = configurations self.parameters = parameters self.controls = controls flat_args, self.unflatten = flatten(self.controls) self.gx = grad(self.cost) self.J = jacobian(self.forward) self.hx = hessian_vector_product(self.cost) self.hvp = hvp(self.hx) y0, t_total, N_total, number_group, population_proportion, \ t_control, number_days_per_control_change, number_control_change_times, number_time_dependent_controls = configurations self.N_total = N_total self.number_group = number_group self.t_control = t_control self.dimension = len(self.t_control) self.number_time_dependent_controls = number_time_dependent_controls self.y0 = y0 self.t_total = t_total self.interpolation = piecewiseLinear self.load_data(fips) self.initialization() if number_group > 1: # contact matrix school_closure = True # calendar from February 15th weekday = [2, 3, 4, 5, 6] # calendar from April 1st # weekday = [0, 1, 2, 5, 6] # calendar from May 1st # weekday = [0, 3, 4, 5, 6] calendar = np.zeros(1000 + 1, dtype=int) # set work days as 1 and school days as 2 for i in range(1001): if np.remainder(i, 7) in weekday: calendar[i] = 1 if not school_closure: # and i < 45 calendar[i] = 2 self.calendar = calendar contact = np.load("utils/contact_matrix.npz") self.c_home = contact["home"] self.c_school = contact["school"] self.c_work = contact["work"] self.c_other = contact["other"] self.contact_full = self.c_home + 5. / 7 * ( (1 - school_closure) * self.c_school + self.c_work) + self.c_other
def print_perf_GCN(params, iter, gradient): if iter % num_batches == 0: train_acc = accuracy_GCN(params, train_images, train_labels) test_acc = accuracy_GCN(params, test_images, test_labels) print("{:15}|{:20.6}|{:20.6}".format(iter//num_batches, train_acc, test_acc)) flattened, _ = flatten(gradient) nG = np.dot(flattened, flattened) print('{:1.3e}'.format(nG))
def _step(value_and_grad, x, itr, state=None, *args, **kwargs): _x, unflatten = flatten(x) def _value_and_grad(x, i): v, g = value_and_grad(unflatten(x), i) return v, flatten(g)[0] _next_x, _next_val, _next_g, _next_state = \ step(_value_and_grad, _x, itr, state=state, *args, **kwargs) return unflatten(_next_x), _next_val, _next_g, _next_state
def grad_odeint_all(yt, func, y0, t, func_args, **kwargs): """ Extended from "Scalable Inference of Ordinary Differential" Equation Models of Biochemical Processes". Sec. 2.4.2 Fabian Froehlich, Carolin Loos, Jan Hasenauer, 2017 https://arxiv.org/pdf/1711.08079.pdf """ T, D = np.shape(yt) flat_args, unflatten = flatten(func_args) def flat_func(y, t, flat_args): return func(y, t, *unflatten(flat_args)) def unpack(x): # y, vjp_y, vjp_t, vjp_args return x[0:D], x[D:2 * D], x[2 * D], x[2 * D + 1:] def augmented_dynamics(augmented_state, t, flat_args): # Original system augemented with vjp_y, vjp_t and vjp_args y, vjp_y, _, _ = unpack(augmented_state) vjp_all, dy_dt = make_vjp(flat_func, argnum=(0, 1, 2))(y, t, flat_args) vjp_y, vjp_t, vjp_args = vjp_all(-vjp_y) return np.hstack((dy_dt, vjp_y, vjp_t, vjp_args)) def vjp_all(g, **kwargs): vjp_y = g[-1, :] vjp_t0 = 0 time_vjp_list = [] vjp_args = np.zeros(np.size(flat_args)) for i in range(T - 1, 0, -1): # Compute effect of moving current time. vjp_cur_t = np.dot(func(yt[i, :], t[i], *func_args), g[i, :]) time_vjp_list.append(vjp_cur_t) vjp_t0 = vjp_t0 - vjp_cur_t # Run augmented system backwards to the previous observation aug_y0 = np.hstack((yt[i, :], vjp_y, vjp_t0, vjp_args)) aug_ans = odeint(augmented_dynamics, aug_y0, np.array(t[i], t[i - 1]), tuple((flat_args,)), **kwargs) _, vjp_y, vjp_t0, vjp_args = unpack(aug_ans[1]) # Add gradient from current output vjp_y = vjp_y + g[i - 1, :] time_vjp_list.append(vjp_t0) vjp_times = np.hstack(time_vjp_list)[::-1] return None, vjp_y, vjp_times, unflatten(vjp_args) return vjp_all
def _generic_minimize(method, loss, x0, verbose=False, num_iters=1000, tol=1e-4, state=None, full_output=False, suppress_warnings=False, **kwargs): """ Minimize a given loss function with scipy.optimize.minimize. """ # Flatten the loss _x0, unflatten = flatten(x0) _objective = lambda x_flat, itr: loss(unflatten(x_flat), itr) if verbose: print("Fitting with {}.".format(method)) # Specify callback for fitting itr = [0] def callback(x_flat): itr[0] += 1 print("Iteration {} loss: {:.3f}".format(itr[0], loss(unflatten(x_flat), -1))) # Wrap the gradient to avoid NaNs def safe_grad(x, itr): g = grad(_objective)(x, itr) g[~np.isfinite(g)] = 1e8 return g # Call the optimizer. Pass in -1 as the iteration since it is unused. result = minimize(_objective, _x0, args=(-1, ), jac=safe_grad, method=method, callback=callback if verbose else None, options=dict(maxiter=num_iters, disp=verbose), tol=tol, **kwargs) if verbose: print("{} completed with message: \n{}".format(method, result.message)) if not suppress_warnings and not result.success: warn("{} failed with message:\n{}".format(method, result.message)) if full_output: return unflatten(result.x), result else: return unflatten(result.x)
def __init__(self, params, predict, inputs, targets): """Construct a Model object given a prediction function.""" self.__params = params self.__params_flat, self.unflatten_params = flatten(self.params) self.predict = predict self.inputs = inputs self.targets = targets self.gradient = autograd.grad(self.loss) self.hessian = autograd.hessian(self.loss) self.hess_dot_vec = autograd.hessian_vector_product(self.loss) self.grad_rayleigh = autograd.grad(self.rayleigh_quotient)
def unflatten_tracing(): val = [ npr.randn(4), [npr.randn(3, 4), 2.5], (), (2.0, [1.0, npr.randn(2)]) ] vect, unflatten = flatten(val) def f(vect): return unflatten(vect) flatten2, _ = make_vjp(f)(vect) assert np.all(vect == flatten2(val))
def initialization(self): # initialize the optimization problem with controls terminate at the end of observation (today) y0, t_total, N_total, number_group, population_proportion, \ t_control, number_days_per_control_change, number_control_change_times, number_time_dependent_controls = self.configurations alpha, q, tau, HFR, kappa, beta, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q = self.controls t0 = time.time() solution = RK2(self.seir, self.y0, self.t_total, self.parameters, self.controls) # np.savez(savefilename, t=t, solution=solution, controls=controls) print("solve time by RK2 method", time.time() - t0) solution_group = self.grouping(solution) print("# total infected = ", self.N_total - solution_group[-1, 0], "# total death = ", solution_group[-1, 7], "maximum # hospitalized = ", np.max(solution_group[:, 5])) self.simulation_first_confirmed = np.where( solution_group[:, 8] >= self.data_confirmed[0])[0][0] # simulation_first_confirmed = np.where(solution_group[:, 8] > data_confirmed[0]-1)[0][0] # simulation_first_confirmed = 40 print("day for first 100 confirmed = ", self.simulation_first_confirmed) # control frequency and time # number_days_per_control_change = 7 day = self.simulation_first_confirmed + self.lag_hospitalized self.loc = np.arange(day, day + len(self.data_hospitalized)) # print("self.loc = ", self.loc) number_days = self.simulation_first_confirmed + len( self.data_confirmed) number_control_change_times = number_days t_total = np.linspace(0, number_days, number_days + 1) t_control = np.linspace(0, number_days, number_days + 1) self.t_total = t_total self.t_control = t_control self.dimension = len(self.t_control) alpha = 0.1 * np.ones(self.dimension) self.controls = (alpha, q, tau, HFR, kappa, beta, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q) flat_args, self.unflatten = flatten(self.controls) self.configurations = (y0, t_total, N_total, number_group, population_proportion, t_control, number_days_per_control_change, number_control_change_times, number_time_dependent_controls)
def print_perf_tgcn(params, iter, gradient): if iter % num_batches == 0: train_acc, train_cm = accuracy_tgcn(params, train_data, train_labels) test_acc, test_cm = accuracy_tgcn(params, test_data, test_labels) print("{:15}|{:20.6}|{:20.6}".format(iter // num_batches, train_acc, test_acc)) flattened, _ = flatten(gradient) ng = np.dot(flattened, flattened) print('{:1.3e}'.format(ng)) print(train_cm) print(test_cm)
def grad_odeint(yt, func, y0, t, func_args, **kwargs): # Extended from "Scalable Inference of Ordinary Differential # Equation Models of Biochemical Processes", Sec. 2.4.2 # Fabian Froehlich, Carolin Loos, Jan Hasenauer, 2017 # https://arxiv.org/abs/1711.08079 T, D = np.shape(yt) flat_args, unflatten = flatten(func_args) def flat_func(y, t, flat_args): return func(y, t, *unflatten(flat_args)) def unpack(x): # y, vjp_y, vjp_t, vjp_args return x[0:D], x[D:2 * D], x[2 * D], x[2 * D + 1:] def augmented_dynamics(augmented_state, t, flat_args): # Orginal system augmented with vjp_y, vjp_t and vjp_args. y, vjp_y, _, _ = unpack(augmented_state) vjp_all, dy_dt = make_vjp(flat_func, argnum=(0, 1, 2))(y, t, flat_args) vjp_y, vjp_t, vjp_args = vjp_all(-vjp_y) return np.hstack((dy_dt, vjp_y, vjp_t, vjp_args)) def vjp_all(g): vjp_y = g[-1, :] vjp_t0 = 0 time_vjp_list = [] vjp_args = np.zeros(np.size(flat_args)) for i in range(T - 1, 0, -1): # Compute effect of moving measurement time. vjp_cur_t = np.dot(func(yt[i, :], t[i], *func_args), g[i, :]) time_vjp_list.append(vjp_cur_t) vjp_t0 = vjp_t0 - vjp_cur_t # Run augmented system backwards to the previous observation. aug_y0 = np.hstack((yt[i, :], vjp_y, vjp_t0, vjp_args)) aug_ans = odeint(augmented_dynamics, aug_y0, np.array([t[i], t[i - 1]]), tuple((flat_args,)), **kwargs) _, vjp_y, vjp_t0, vjp_args = unpack(aug_ans[1]) # Add gradient from current output. vjp_y = vjp_y + g[i - 1, :] time_vjp_list.append(vjp_t0) vjp_times = np.hstack(time_vjp_list)[::-1] return None, vjp_y, vjp_times, unflatten(vjp_args) return vjp_all
def log_gaussian(params, scale): flat_params, _ = flatten(params) return np.sum(norm.logpdf(flat_params, 0, scale))
def f(x, y): xy, _ = flatten([x, y]) return np.sum(xy)
def unflatten_tracing(): val = [npr.randn(4), [npr.randn(3,4), 2.5], (), (2.0, [1.0, npr.randn(2)])] vect, unflatten = flatten(val) def f(vect): return unflatten(vect) flatten2, _ = make_vjp(f)(vect) assert np.all(vect == flatten2(val))
def test_flatten_empty(): val = (npr.randn(4), [npr.randn(3,4), 2.5], (), (2.0, [1.0, npr.randn(2)])) vect, unflatten = flatten(val) val_recovered = unflatten(vect) vect_2, _ = flatten(val_recovered) assert np.all(vect == vect_2)
def test_flatten_complex(): val = 1 + 1j flat, unflatten = flatten(val) assert np.all(val == unflatten(flat))