def update(self, state): """ Update central values of latent distributions by minimizing the VARIATIONAL LAPLACE ENCODED FREE ENERGY via gradient descent. 1) VARIATIONAL LAPLACE ENCODED FREE ENERGY F = -log p(s, mu) + Consts 2) Facorized p(s, mu) p(s, mu) = p(sp|mu)*p(sv|mu)*p(dmu|mu, pho) Args: state: (float, float, float) joint angle, visual x, visual y Returns action: (float) updated angular velocity """ # generate fake sensory states (useless at the moment) gstate = self.generate() # current state sp, sv = state[0], state[1:] self.sp = sp if self.sp is None else self.sp self.sv = sv if self.sv is None else self.sv # dynamics step df = self.dynamics(self.f, self.rho * self.fh) # modify central value of latent variable through gradient descent dmu = \ + (sp - self.mu) / self.sp_sigma2 \ + np.dot(np.dot(self.inv_sv_sigma2, dg(self.mu)), (sv - g(self.mu))) \ + self.f[1] * (self.dmu - self.f[0]) / self.sp_sigma2 # modify first order of central value of latent variable through # gradient descent ddmu = (self.f[0] - self.dmu) / self.sm_sigma2 # modify action variable through gradient descent dsp = self.h * (1 / self.dynamics.k) dsv = self.h * (1 / self.dynamics.k) * np.ones_like(sv) self.da = \ - dsp * (sp - self.mu) / self.sp_sigma2 \ - np.dot(np.dot(self.inv_sv_sigma2, dsv), (sv - g(self.mu))) # updates self.dmu += self.h * ddmu self.a += self.h * self.da self.mu += self.dmu + self.h * dmu self.f += self.h * df # store state of the step as the previous state of next step self.sp, self.sv = sp, sv.copy() return self.a
def generateSensoryData(self): """ Sensory state from latent distribution Returns: (float, float, float) joint angle, visual x, visual y """ self.istate = [self.mu, *self.arm_length * g(self.mu)] state = self.rng.randn(3) * \ [self.sp_sigma, self.sv_sigma, self.sv_sigma] + \ self.istate return state
def generate(self): """ Generate a 'fake' sensory state from internal distributions Returns: generated sensory state: (float, float, float) joint angle, visual x, visual y """ gstate = np.zeros(3) gstate[0] = self.sp_sigma * self.rng.randn() + self.mu gstate[1:] = np.random.multivariate_normal( self.arm_length * g(self.mu), self.sv_sigma) self.gstate = gstate return gstate
def simulation(): real_mu = 0 * np.pi model_mu = 0 * np.pi model_rho = -0.35 * np.pi stime = 50000 rng = np.random.RandomState() plotter = Plotter(time_window=stime) # init the generative model (agent) and the generative process # (environment) gprocess = Env(rng) gmodel = Model(rng, mu=model_mu, rho=model_rho) state = gprocess.reset(mu=real_mu) for t in range(stime): # Update model via gradient descent and get action action = gmodel.update(state) # Generated fake sensory state from model gstate = gmodel.gstate # do action state = gprocess.step(action) # update plot every n steps plotter.append_mu(gprocess.mu, gmodel.mu) if t % 1000 == 0 or t == stime - 1: plotter.sensed_arm.update(state[0], state[1:]) plotter.real_arm.update(gprocess.istate[0], gprocess.istate[1:]) plotter.generated_arm.update(gstate[0], gstate[1:]) plotter.target_arm.update(gmodel.rho, g(gmodel.rho)) plotter.update() input("Press any button to close.")
def test_g_3(self): self.assertEqual(funcs.g(3, 3), 2)
def test_g_2(self): self.assertAlmostEqual(funcs.g(2.0, 2.0), 1.3333333333333)
def test_g_1(self): self.assertAlmostEqual(funcs.g(1, 0), 0.333333333333333)
def test_f3(self): g_1 = funcs.g(1, 1) self.assertEqual(g_1, 2)
def test_f_2(self): self.assertEqual(funcs.g(5, 6), 61) self.assertEqual(funcs.g(2, 3), 13)
def test_g_1(self): self.assertEqual(funcs.g(1,2),5) pass
def test_g_1(self): self.assertEquals(funcs.g(1, 1), 2) pass
def test_g_2(self): self.assertEqual(funcs.g(56, 12), 3280)
def test_g_1(self): self.assertEqual(funcs.g(2, 3), 13)
def test_f_1(self): self.assertEqual(funcs.f(1), 9) self.assertEqual(funcs.g(1,1), 2) self.assertEqual(funcs.hypotenuse(3,4), 5) self.assertEqual(funcs.is_positive(3.50), True)
def test_f_2(self): # Add code here. self.assertEqual(funcs.f(3.0), 69) self.assertEqual(funcs.g(67, -2), 4493) self.assertEqual(funcs.hypotenuse(4.7, 6.9), 8.3486525858967209) self.assertEqual(funcs.is_positive(-3.4123849394), False)
def test_g(self): self.assertEqual(funcs.g(1, 1), 2) self.assertEqual(funcs.g(2, 1), 5) # Add code here. pass
def test_f4(self): g_2 = funcs.g(0, 0) self.assertEqual(g_2, 0)
def test_f_2(self): self.assertEqual(funcs.g(1, 2), 5)
def test_g_2(self): self.assertEqual(g(3,3),2)
def test_f_2a(self): self.assertEqual(funcs.g(-100, 23), 10529)
def test_f_2(self): self.assertEqual(g(2, 2), 8) self.assertEqual(g(3, 3), 18)
gmodel = Model(rng, mu=model_mu, rho=model_rho) gprocess.set_sigma(gprocess_sigma) gmodel.set_sigma(gmodel_sigma) # %% Iteration Loop state = gprocess.reset(mu=real_mu) for t in range(stime): # Update model via gradient descent and get action action = gmodel.update(state) # Generated fake sensory state from model gstate = gmodel.gstate # do action state = gprocess.step(action) # update plot every n steps plotter.append_mu(gprocess.mu, gmodel.mu) if t % 1000 == 0 or t == stime-1: plotter.sensed_arm.update(state[0], state[1:]) plotter.real_arm.update(gprocess.istate[0], gprocess.istate[1:]) plotter.generated_arm.update(gstate[0], gstate[1:]) plotter.target_arm.update(gmodel.rho, g(gmodel.rho)) plotter.update() input("Press any button to close.")
def test_g_2(self): self.assertEqual(funcs.g(3, 0), 1)
def test_g_2(self): self.assertEqual(funcs.g(2,1),5) pass
def test_g_3(self): self.assertAlmostEqual(funcs.g(2, 7), 8.833333333)
def test_g_1(self): # Add code here. REMOVE PASS self.assertEqual(funcs.g(1, 2), 5/3)
def test_g_2(self): self.assertEquals(funcs.g(0, 0), 0) pass