def run_irl(world, car, reward, theta, data): def gen(): for point in data: for c, x0, u in zip(world.cars, point['x0'], point['u']): c.traj.x0.set_value(x0) for cu, uu in zip(c.traj.u, u): cu.set_value(uu) yield r = car.traj.reward(reward) g = utils.grad(r, car.traj.u) H = utils.hessian(r, car.traj.u) I = tt.eye(utils.shape(H)[0]) reg = utils.vector(1) reg.set_value([1e-1]) H = H - reg[0] * I L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g)) + tt.log(tn.Det()(-H)) for _ in gen(): pass optimizer = utils.Maximizer(L, [theta], gen=gen, method='gd', eps=0.1, debug=True, iters=1000, inf_ignore=10) optimizer.maximize() print theta.get_value()
def minus_two_log_gauss_likelihood_2D(residuals, covariance_values): """computes the -2 log gaussian likelihood (ignoring the constant term) from the given residuals and covariance values in the form of a 1D array: [variance1, covariance, variance2]""" cov = covariance_matrix_2D(covariance_values) det = L.Det()(cov) precis = L.MatrixInverse()(cov) term1 = T.dot(T.transpose(residuals), T.dot(precis, residuals)) term2 = T.log(det) return term1 + term2
def dima(x): return -T.log(nlinalg.Det()(T.dot(x.T, x)))
residual = rtransform - routput # For covariance, we want to subtract out the means... sigmean = rtransform.mean(axis=(0, 2), keepdims=True) epsmean = residual.mean(axis=(0, 2), keepdims=True) rtdelta = rtransform - sigmean epsdelta = residual - epsmean # Covariance matrices sig_cov = T.tensordot(rtdelta, rtdelta, axes=( (0, 2), (0, 2))) / (rtransform.shape[0] * rtransform.shape[2]) eps_cov = T.tensordot(epsdelta, epsdelta, axes=( (0, 2), (0, 2))) / (residual.shape[0] * residual.shape[2]) det_sig = TNL.Det()(sig_cov) + 1e-48 det_eps = TNL.Det()(eps_cov) + 1e-48 entropy = T.log(det_sig) info = T.log(det_eps) # First two terms gives the entropy contrast, but we'd also like the predictions to be correct (as opposed to constant offset), so we add a third term to encourage the mean residual to be zero. loss = info - entropy + 1e-2 * (epsmean**2).mean() else: # Entropy term measures the entropy of the average transformed signal. We want to make this large entropy = -1 * (rtransform.mean(axis=(0, 2)) * T.log(rtransform.mean(axis=(0, 2)) + 1e-6)).sum() # Info term measures the error of the predictor (standard cross-entropy error between the prediction and true distribution). We want to make this small info = -1 * ((rtransform * T.log(routput + 1e-6)).mean(axis=(0, 2))).sum()