class Test(unittest.TestCase): # Test that sampler and data managers are set up correctly def setUp(self): defaultSettings = SettingsManager.getDefaultSettings() defaultSettings.setProperty('numTimeSteps', 20) self.sampler = EpisodeWithStepsSampler(samplerNameSteps='steps') self.sampler.numSamples = 3 self.dataManager = self.sampler.getEpisodeDataManager() self.sampler.stepSampler.setTerminationFunction( NumStepsTerminationFunction(self.dataManager, None, numTimeSteps=20)) environment = MountainCar(self.dataManager) self.sampler.setContextSampler(environment.sampleContext) self.sampler.setActionPolicy(environment.sampleAction) self.sampler.setTransitionFunction(environment.transitionFunction) self.sampler.setRewardFunction(environment.sampleReward) self.sampler.setInitStateSampler(environment.sampleInitState) def test_sampleData(self): self.sampler.setParallelSampling(True) newData = self.dataManager.createDataObject([3, 20]) self.sampler >> newData[...] print('States:', newData[1, :].states) print('NextStates:', newData[1, :].nextStates)
class Test(unittest.TestCase): def setUp(self): self.sampler = EpisodeWithStepsSampler() self.dataManager = self.sampler.getEpisodeDataManager() self.tf = TransitionFunction(self.dataManager, 2, 1) # Test that sampler and data managers are set up correctly def test_init(self): stepManager = self.sampler.getEpisodeDataManager().subDataManager self.assertIn('states', stepManager.dataEntries) self.assertIn('actions', stepManager.dataEntries) # self.assertIn('nextStates', stepManager.dataEntries) stepSampler = self.sampler.stepSampler
class Test(unittest.TestCase): def setUp(self): self.sampler = EpisodeWithStepsSampler() self.episodeManager = self.sampler.getEpisodeDataManager() double_link = DoubleLink(self.episodeManager) self.sampler.stepSampler.setIsActiveSampler( StepBasedEpisodeTerminationSampler(self.episodeManager, 'steps', 40)) initialStateSampler = InitialStateSamplerStandard(self.episodeManager) dummyActionAndReward = DummyActionAndReward( self.episodeManager.subDataManager, 2, True) self.sampler.setTransitionFunction(double_link.getExpectedNextState) self.sampler.setInitStateSampler(initialStateSampler.sampleInitState) self.sampler.setActionPolicy(dummyActionAndReward.sampleAction) self.sampler.setRewardFunction(dummyActionAndReward.sampleReward) self.sampler.setReturnFunction(dummyActionAndReward.sampleReward) def testGenerating(self): data = self.episodeManager.getDataObject(10) self.sampler.numSamples = 100 self.sampler.setParallelSampling(True) data[...] >> self.sampler self.assertEqual(data[:, 1].states.shape, (100, 4)) self.assertEqual(data[1, :].states.shape, (40, 4))
class Test(unittest.TestCase): def setUp(self): defaultSettings = SettingsManager.getDefaultSettings() defaultSettings.setProperty('noiseStd', 1.0) defaultSettings.setProperty('initialStateDistributionMinRange', np.asarray([np.pi - np.pi, -2])) defaultSettings.setProperty('initialStateDistributionMaxRange', np.asarray([np.pi + np.pi, 2])) defaultSettings.setProperty('initialStateDistributionType', 'Uniform') defaultSettings.setProperty('dt', 0.025) defaultSettings.setProperty('initSigmaActions', 1.0) defaultSettings.setProperty('initialStateDistributionMinRange', np.asarray([np.pi - np.pi, -2])) defaultSettings.setProperty('initialStateDistributionMaxRange', np.asarray([np.pi + np.pi, 2])) self.sampler = EpisodeWithStepsSampler() self.episodeManager = self.sampler.getEpisodeDataManager() self.stepManager = self.episodeManager.subDataManager self.pendulum = Pendulum(self.episodeManager) self.sampler.stepSampler.setIsActiveSampler( StepBasedEpisodeTerminationSampler(self.episodeManager, 'bla', numTimeSteps=40)) initialStateSampler = InitialStateSamplerStandard(self.episodeManager) dummyActionAndReward = DummyActionAndReward( self.episodeManager.subDataManager, 1) self.sampler.setTransitionFunction(self.pendulum.getExpectedNextState) self.sampler.setInitStateSampler(initialStateSampler.sampleInitState) self.sampler.setActionPolicy(dummyActionAndReward.sampleAction) self.sampler.setRewardFunction(dummyActionAndReward.sampleReward) self.sampler.setReturnFunction(dummyActionAndReward.sampleReward) def testGenerating(self): data = self.episodeManager.getDataObject(10) self.sampler.numSamples = 100 self.sampler.setParallelSampling(True) data >> self.sampler self.assertEqual(data[:, 1].states.shape, (100, 2)) self.assertEqual(data[1, :].states.shape, (40, 2))
@Mapping.MappingMethod() def getAction(self, states): return states * 2 * np.ones(2) class TestReward(Mapping): def __init__(self, dataManager): Mapping.__init__(self, dataManager, ['states', 'actions'], ['rewards']) @Mapping.MappingMethod() def getReward(self, states, actions): return states[:, 0:1] * 2 sampler = EpisodeWithStepsSampler() dataManager = sampler.getEpisodeDataManager() environment = TestEnvironment(dataManager) policy = TestPolicy(dataManager) reward = TestReward(dataManager) sampler.setInitStateSampler(environment.initState) sampler.setTransitionFunction(environment) sampler.setActionPolicy(policy) sampler.setRewardFunction(reward) data = dataManager.createDataObject([1, 20]) data[...] >> sampler >> data states = data.getDataEntry('states', 1)