def setUp(self): defaultSettings = SettingsManager.getDefaultSettings() defaultSettings.setProperty('noiseStd', 1.0) defaultSettings.setProperty('initialStateDistributionMinRange', np.asarray([np.pi - np.pi, -2])) defaultSettings.setProperty('initialStateDistributionMaxRange', np.asarray([np.pi + np.pi, 2])) defaultSettings.setProperty('initialStateDistributionType', 'Uniform') defaultSettings.setProperty('dt', 0.025) defaultSettings.setProperty('initSigmaActions', 1.0) defaultSettings.setProperty('initialStateDistributionMinRange', np.asarray([np.pi - np.pi, -2])) defaultSettings.setProperty('initialStateDistributionMaxRange', np.asarray([np.pi + np.pi, 2])) self.sampler = EpisodeWithStepsSampler() self.episodeManager = self.sampler.getEpisodeDataManager() self.stepManager = self.episodeManager.subDataManager self.pendulum = Pendulum(self.episodeManager) self.sampler.stepSampler.setIsActiveSampler( StepBasedEpisodeTerminationSampler(self.episodeManager, 'bla', numTimeSteps=40)) initialStateSampler = InitialStateSamplerStandard(self.episodeManager) dummyActionAndReward = DummyActionAndReward( self.episodeManager.subDataManager, 1) self.sampler.setTransitionFunction(self.pendulum.getExpectedNextState) self.sampler.setInitStateSampler(initialStateSampler.sampleInitState) self.sampler.setActionPolicy(dummyActionAndReward.sampleAction) self.sampler.setRewardFunction(dummyActionAndReward.sampleReward) self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)
class testStepSampler(unittest.TestCase): def setUp(self): settings = SettingsManager.getDefaultSettings() settings.setProperty('numTimeSteps', 40) self.dataManager = DataUtil.createTestManagerSteps() self.stepSamplerEpisodes = EpisodeWithStepsSampler( self.dataManager, 'episodes', 'steps') #Todo: get function for data manager def tearDown(self): pass def testSampling(self): environment = TestEnvironment(self.dataManager) policy = TestPolicy(self.dataManager) reward = TestReward(self.dataManager) self.stepSamplerEpisodes.setInitStateSampler(environment.initState) self.stepSamplerEpisodes.setTransitionFunction(environment) self.stepSamplerEpisodes.setActionPolicy(policy) self.stepSamplerEpisodes.setRewardFunction(reward) data = self.dataManager.createDataObject([10, 100]) data[Ellipsis] >> self.stepSamplerEpisodes states = data.getDataEntry('states', 1) actions = data.getDataEntry('actions', 2) rewards = data.getDataEntry('rewards', 3) statesTarget = np.array(range(1, 41)) self.assertTrue( (abs(states.transpose() - statesTarget) < 0.00001).all()) self.assertTrue((abs(states * 2 - actions) < 0.00001).all()) self.assertTrue((abs(states * 2 - rewards) < 0.00001).all())
class Test(unittest.TestCase): def setUp(self): self.sampler = EpisodeWithStepsSampler() self.dataManager = self.sampler.getEpisodeDataManager() self.tf = TransitionFunction(self.dataManager, 2, 1) # Test that sampler and data managers are set up correctly def test_init(self): stepManager = self.sampler.getEpisodeDataManager().subDataManager self.assertIn('states', stepManager.dataEntries) self.assertIn('actions', stepManager.dataEntries) # self.assertIn('nextStates', stepManager.dataEntries) stepSampler = self.sampler.stepSampler
def setUp(self): self.sampler = EpisodeWithStepsSampler() self.episodeManager = self.sampler.getEpisodeDataManager() double_link = DoubleLink(self.episodeManager) self.sampler.stepSampler.setIsActiveSampler( StepBasedEpisodeTerminationSampler(self.episodeManager, 'steps', 40)) initialStateSampler = InitialStateSamplerStandard(self.episodeManager) dummyActionAndReward = DummyActionAndReward( self.episodeManager.subDataManager, 2, True) self.sampler.setTransitionFunction(double_link.getExpectedNextState) self.sampler.setInitStateSampler(initialStateSampler.sampleInitState) self.sampler.setActionPolicy(dummyActionAndReward.sampleAction) self.sampler.setRewardFunction(dummyActionAndReward.sampleReward) self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)
def setUp(self): defaultSettings = SettingsManager.getDefaultSettings() defaultSettings.setProperty('numTimeSteps', 20) self.sampler = EpisodeWithStepsSampler(samplerNameSteps='steps') self.sampler.numSamples = 3 self.dataManager = self.sampler.getEpisodeDataManager() self.sampler.stepSampler.setTerminationFunction( NumStepsTerminationFunction(self.dataManager, None, numTimeSteps=20)) environment = MountainCar(self.dataManager) self.sampler.setContextSampler(environment.sampleContext) self.sampler.setActionPolicy(environment.sampleAction) self.sampler.setTransitionFunction(environment.transitionFunction) self.sampler.setRewardFunction(environment.sampleReward) self.sampler.setInitStateSampler(environment.sampleInitState)
@Mapping.MappingMethod() def getAction(self, states): return states * 2 * np.ones(2) class TestReward(Mapping): def __init__(self, dataManager): Mapping.__init__(self, dataManager, ['states', 'actions'], ['rewards']) @Mapping.MappingMethod() def getReward(self, states, actions): return states[:, 0:1] * 2 sampler = EpisodeWithStepsSampler() dataManager = sampler.getEpisodeDataManager() environment = TestEnvironment(dataManager) policy = TestPolicy(dataManager) reward = TestReward(dataManager) sampler.setInitStateSampler(environment.initState) sampler.setTransitionFunction(environment) sampler.setActionPolicy(policy) sampler.setRewardFunction(reward) data = dataManager.createDataObject([1, 20]) data[...] >> sampler >> data
class Test(unittest.TestCase): # Test that sampler and data managers are set up correctly def setUp(self): defaultSettings = SettingsManager.getDefaultSettings() defaultSettings.setProperty('numTimeSteps', 20) self.sampler = EpisodeWithStepsSampler(samplerNameSteps='steps') self.sampler.numSamples = 3 self.dataManager = self.sampler.getEpisodeDataManager() self.sampler.stepSampler.setTerminationFunction( NumStepsTerminationFunction(self.dataManager, None, numTimeSteps=20)) environment = MountainCar(self.dataManager) self.sampler.setContextSampler(environment.sampleContext) self.sampler.setActionPolicy(environment.sampleAction) self.sampler.setTransitionFunction(environment.transitionFunction) self.sampler.setRewardFunction(environment.sampleReward) self.sampler.setInitStateSampler(environment.sampleInitState) def test_sampleData(self): self.sampler.setParallelSampling(True) newData = self.dataManager.createDataObject([3, 20]) self.sampler >> newData[...] print('States:', newData[1, :].states) print('NextStates:', newData[1, :].nextStates)
def setUp(self): settings = SettingsManager.getDefaultSettings() settings.setProperty('numTimeSteps', 40) self.dataManager = DataUtil.createTestManagerSteps() self.stepSamplerEpisodes = EpisodeWithStepsSampler( self.dataManager, 'episodes', 'steps')
def setUp(self): self.sampler = EpisodeWithStepsSampler() self.dataManager = self.sampler.getEpisodeDataManager() self.tf = TransitionFunction(self.dataManager, 2, 1)
class Test(unittest.TestCase): def setUp(self): defaultSettings = SettingsManager.getDefaultSettings() defaultSettings.setProperty('noiseStd', 1.0) defaultSettings.setProperty('initialStateDistributionMinRange', np.asarray([np.pi - np.pi, -2])) defaultSettings.setProperty('initialStateDistributionMaxRange', np.asarray([np.pi + np.pi, 2])) defaultSettings.setProperty('initialStateDistributionType', 'Uniform') defaultSettings.setProperty('dt', 0.025) defaultSettings.setProperty('initSigmaActions', 1.0) defaultSettings.setProperty('initialStateDistributionMinRange', np.asarray([np.pi - np.pi, -2])) defaultSettings.setProperty('initialStateDistributionMaxRange', np.asarray([np.pi + np.pi, 2])) self.sampler = EpisodeWithStepsSampler() self.episodeManager = self.sampler.getEpisodeDataManager() self.stepManager = self.episodeManager.subDataManager self.pendulum = Pendulum(self.episodeManager) self.sampler.stepSampler.setIsActiveSampler( StepBasedEpisodeTerminationSampler(self.episodeManager, 'bla', numTimeSteps=40)) initialStateSampler = InitialStateSamplerStandard(self.episodeManager) dummyActionAndReward = DummyActionAndReward( self.episodeManager.subDataManager, 1) self.sampler.setTransitionFunction(self.pendulum.getExpectedNextState) self.sampler.setInitStateSampler(initialStateSampler.sampleInitState) self.sampler.setActionPolicy(dummyActionAndReward.sampleAction) self.sampler.setRewardFunction(dummyActionAndReward.sampleReward) self.sampler.setReturnFunction(dummyActionAndReward.sampleReward) def testGenerating(self): data = self.episodeManager.getDataObject(10) self.sampler.numSamples = 100 self.sampler.setParallelSampling(True) data >> self.sampler self.assertEqual(data[:, 1].states.shape, (100, 2)) self.assertEqual(data[1, :].states.shape, (40, 2))
class Test(unittest.TestCase): def setUp(self): self.sampler = EpisodeWithStepsSampler() self.episodeManager = self.sampler.getEpisodeDataManager() double_link = DoubleLink(self.episodeManager) self.sampler.stepSampler.setIsActiveSampler( StepBasedEpisodeTerminationSampler(self.episodeManager, 'steps', 40)) initialStateSampler = InitialStateSamplerStandard(self.episodeManager) dummyActionAndReward = DummyActionAndReward( self.episodeManager.subDataManager, 2, True) self.sampler.setTransitionFunction(double_link.getExpectedNextState) self.sampler.setInitStateSampler(initialStateSampler.sampleInitState) self.sampler.setActionPolicy(dummyActionAndReward.sampleAction) self.sampler.setRewardFunction(dummyActionAndReward.sampleReward) self.sampler.setReturnFunction(dummyActionAndReward.sampleReward) def testGenerating(self): data = self.episodeManager.getDataObject(10) self.sampler.numSamples = 100 self.sampler.setParallelSampling(True) data[...] >> self.sampler self.assertEqual(data[:, 1].states.shape, (100, 4)) self.assertEqual(data[1, :].states.shape, (40, 4))
defaultSettings.setProperty('dt', 0.025) defaultSettings.setProperty('initSigmaActions', 1e-10) # ... to sample the initial states ... defaultSettings.setProperty('initialStateDistributionType', 'Uniform') defaultSettings.setProperty('numTimeSteps', steps_per_epoch) # ... and for the image creating preprocessor. Note that the same settings object is used here which is then # distributed by the setting manager to all its clients. (The Pendulum, the InitalStateSampler as well as the # Preprocessor going to be setting clients img_size = 48 # pixels, height and width. (currently only squared images possible) defaultSettings.setProperty('imgSize', img_size) defaultSettings.setProperty('lineWidth', 3) # again pixel # Next we can create a "Sampler". It will later sample the values from the desired environment sampler = EpisodeWithStepsSampler() # We also need the data managers for the episode and step layer - we get them from the sampler episodeManager = sampler.getEpisodeDataManager() # Finally, we can create our environment. Try exchanging the DoubleLink with the QuadLink or the Pendulum... n_link_pendulum = Pendulum(episodeManager) # ... make sure you change the number of joints too number_of_joints = 1 # We can still add further settings, e.g. for the initialStateSampler created next defaultSettings.setProperty( 'initialStateDistributionMinRange', np.tile(np.asarray([np.pi - np.pi, -2]), [number_of_joints])) defaultSettings.setProperty( 'initialStateDistributionMaxRange',