def createExperimentInstance(): gymRawEnv = gym.make('MountainCarContinuous-v0') cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16) cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4) actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True) # print("Cart position bins:", cartPositionGroup) # print("Cart velocity bins:", cartVelocityGroup) # print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues()) observationDigitizer = ArrayDigitizer( [cartPositionGroup, cartVelocityGroup]) transformation = EnvTransformation(observationDigitizer, actionDedigitizer) task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) # env.setCumulativeRewardMode() # create agent with controller and learner - use SARSA(), Q() or QLambda() here ## alpha -- learning rate (preference of new information) ## gamma -- discount factor (importance of future reward) # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, actionDedigitizer.states) table.initialize(0.0) # table.initialize( np.random.rand( table.paramdim ) ) agent = createAgent(table) experiment = Experiment(task, agent) experiment = ProcessExperiment(experiment, doSingleExperiment) return experiment
def test_values_noEdges_5(self): digitizer = Digitizer.build(-1.0, 1.0, 5, False) npt.assert_array_almost_equal(digitizer.values, [-0.6, -0.3, 0.0, 0.3, 0.6], 3)
def test_build_edge_5(self): digitizer = Digitizer.build(0.0, 10.0, 5, True) self.assertEqual(digitizer.states, 5) npt.assert_array_almost_equal(digitizer.bins, [0., 3.333, 6.667, 10.], 3) npt.assert_array_almost_equal(digitizer.values, [0., 2.5, 5., 7.5, 10.], 3)
def test_values_edges_5(self): digitizer = Digitizer.build(-1.0, 1.0, 5, True) npt.assert_array_almost_equal(digitizer.values, [-1.0, -0.5, 0.0, 0.5, 1.0], 3)
def test_build_noEdge_5(self): digitizer = Digitizer.build(0.0, 10.0, 5, False) self.assertEqual(digitizer.states, 5) npt.assert_array_almost_equal(digitizer.bins, [2., 4., 6., 8.], 3) npt.assert_array_almost_equal(digitizer.values, [2., 3.5, 5., 6.5, 8.], 3)