예제 #1
0
    def setUp(self):
        defaultSettings = SettingsManager.getDefaultSettings()
        defaultSettings.setProperty('noiseStd', 1.0)
        defaultSettings.setProperty('initialStateDistributionMinRange',
                                    np.asarray([np.pi - np.pi, -2]))
        defaultSettings.setProperty('initialStateDistributionMaxRange',
                                    np.asarray([np.pi + np.pi, 2]))
        defaultSettings.setProperty('initialStateDistributionType', 'Uniform')
        defaultSettings.setProperty('dt', 0.025)
        defaultSettings.setProperty('initSigmaActions', 1.0)
        defaultSettings.setProperty('initialStateDistributionMinRange',
                                    np.asarray([np.pi - np.pi, -2]))
        defaultSettings.setProperty('initialStateDistributionMaxRange',
                                    np.asarray([np.pi + np.pi, 2]))

        self.sampler = EpisodeWithStepsSampler()
        self.episodeManager = self.sampler.getEpisodeDataManager()
        self.stepManager = self.episodeManager.subDataManager
        self.pendulum = Pendulum(self.episodeManager)

        self.sampler.stepSampler.setIsActiveSampler(
            StepBasedEpisodeTerminationSampler(self.episodeManager,
                                               'bla',
                                               numTimeSteps=40))

        initialStateSampler = InitialStateSamplerStandard(self.episodeManager)

        dummyActionAndReward = DummyActionAndReward(
            self.episodeManager.subDataManager, 1)

        self.sampler.setTransitionFunction(self.pendulum.getExpectedNextState)
        self.sampler.setInitStateSampler(initialStateSampler.sampleInitState)
        self.sampler.setActionPolicy(dummyActionAndReward.sampleAction)
        self.sampler.setRewardFunction(dummyActionAndReward.sampleReward)
        self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)
예제 #2
0
class testStepSampler(unittest.TestCase):
    def setUp(self):
        settings = SettingsManager.getDefaultSettings()
        settings.setProperty('numTimeSteps', 40)
        self.dataManager = DataUtil.createTestManagerSteps()
        self.stepSamplerEpisodes = EpisodeWithStepsSampler(
            self.dataManager, 'episodes', 'steps')

        #Todo: get function for data manager
    def tearDown(self):
        pass

    def testSampling(self):
        environment = TestEnvironment(self.dataManager)
        policy = TestPolicy(self.dataManager)
        reward = TestReward(self.dataManager)

        self.stepSamplerEpisodes.setInitStateSampler(environment.initState)
        self.stepSamplerEpisodes.setTransitionFunction(environment)
        self.stepSamplerEpisodes.setActionPolicy(policy)
        self.stepSamplerEpisodes.setRewardFunction(reward)

        data = self.dataManager.createDataObject([10, 100])
        data[Ellipsis] >> self.stepSamplerEpisodes

        states = data.getDataEntry('states', 1)
        actions = data.getDataEntry('actions', 2)
        rewards = data.getDataEntry('rewards', 3)

        statesTarget = np.array(range(1, 41))
        self.assertTrue(
            (abs(states.transpose() - statesTarget) < 0.00001).all())
        self.assertTrue((abs(states * 2 - actions) < 0.00001).all())
        self.assertTrue((abs(states * 2 - rewards) < 0.00001).all())
예제 #3
0
class Test(unittest.TestCase):
    def setUp(self):
        self.sampler = EpisodeWithStepsSampler()
        self.dataManager = self.sampler.getEpisodeDataManager()

        self.tf = TransitionFunction(self.dataManager, 2, 1)

    # Test that sampler and data managers are set up correctly
    def test_init(self):

        stepManager = self.sampler.getEpisodeDataManager().subDataManager
        self.assertIn('states', stepManager.dataEntries)
        self.assertIn('actions', stepManager.dataEntries)
        # self.assertIn('nextStates', stepManager.dataEntries)

        stepSampler = self.sampler.stepSampler
예제 #4
0
    def setUp(self):
        self.sampler = EpisodeWithStepsSampler()
        self.episodeManager = self.sampler.getEpisodeDataManager()
        double_link = DoubleLink(self.episodeManager)
        self.sampler.stepSampler.setIsActiveSampler(
            StepBasedEpisodeTerminationSampler(self.episodeManager, 'steps',
                                               40))

        initialStateSampler = InitialStateSamplerStandard(self.episodeManager)
        dummyActionAndReward = DummyActionAndReward(
            self.episodeManager.subDataManager, 2, True)

        self.sampler.setTransitionFunction(double_link.getExpectedNextState)
        self.sampler.setInitStateSampler(initialStateSampler.sampleInitState)
        self.sampler.setActionPolicy(dummyActionAndReward.sampleAction)
        self.sampler.setRewardFunction(dummyActionAndReward.sampleReward)
        self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)
예제 #5
0
    def setUp(self):

        defaultSettings = SettingsManager.getDefaultSettings()
        defaultSettings.setProperty('numTimeSteps', 20)

        self.sampler = EpisodeWithStepsSampler(samplerNameSteps='steps')
        self.sampler.numSamples = 3
        self.dataManager = self.sampler.getEpisodeDataManager()
        self.sampler.stepSampler.setTerminationFunction(
            NumStepsTerminationFunction(self.dataManager,
                                        None,
                                        numTimeSteps=20))
        environment = MountainCar(self.dataManager)

        self.sampler.setContextSampler(environment.sampleContext)
        self.sampler.setActionPolicy(environment.sampleAction)
        self.sampler.setTransitionFunction(environment.transitionFunction)
        self.sampler.setRewardFunction(environment.sampleReward)
        self.sampler.setInitStateSampler(environment.sampleInitState)
예제 #6
0
    @Mapping.MappingMethod()
    def getAction(self, states):
        return states * 2 * np.ones(2)


class TestReward(Mapping):
    def __init__(self, dataManager):
        Mapping.__init__(self, dataManager, ['states', 'actions'], ['rewards'])

    @Mapping.MappingMethod()
    def getReward(self, states, actions):
        return states[:, 0:1] * 2


sampler = EpisodeWithStepsSampler()
dataManager = sampler.getEpisodeDataManager()

environment = TestEnvironment(dataManager)
policy = TestPolicy(dataManager)
reward = TestReward(dataManager)

sampler.setInitStateSampler(environment.initState)
sampler.setTransitionFunction(environment)
sampler.setActionPolicy(policy)
sampler.setRewardFunction(reward)

data = dataManager.createDataObject([1, 20])

data[...] >> sampler >> data
예제 #7
0
class Test(unittest.TestCase):

    # Test that sampler and data managers are set up correctly
    def setUp(self):

        defaultSettings = SettingsManager.getDefaultSettings()
        defaultSettings.setProperty('numTimeSteps', 20)

        self.sampler = EpisodeWithStepsSampler(samplerNameSteps='steps')
        self.sampler.numSamples = 3
        self.dataManager = self.sampler.getEpisodeDataManager()
        self.sampler.stepSampler.setTerminationFunction(
            NumStepsTerminationFunction(self.dataManager,
                                        None,
                                        numTimeSteps=20))
        environment = MountainCar(self.dataManager)

        self.sampler.setContextSampler(environment.sampleContext)
        self.sampler.setActionPolicy(environment.sampleAction)
        self.sampler.setTransitionFunction(environment.transitionFunction)
        self.sampler.setRewardFunction(environment.sampleReward)
        self.sampler.setInitStateSampler(environment.sampleInitState)

    def test_sampleData(self):
        self.sampler.setParallelSampling(True)
        newData = self.dataManager.createDataObject([3, 20])
        self.sampler >> newData[...]
        print('States:', newData[1, :].states)
        print('NextStates:', newData[1, :].nextStates)
예제 #8
0
 def setUp(self):
     settings = SettingsManager.getDefaultSettings()
     settings.setProperty('numTimeSteps', 40)
     self.dataManager = DataUtil.createTestManagerSteps()
     self.stepSamplerEpisodes = EpisodeWithStepsSampler(
         self.dataManager, 'episodes', 'steps')
예제 #9
0
    def setUp(self):
        self.sampler = EpisodeWithStepsSampler()
        self.dataManager = self.sampler.getEpisodeDataManager()

        self.tf = TransitionFunction(self.dataManager, 2, 1)
예제 #10
0
class Test(unittest.TestCase):
    def setUp(self):
        defaultSettings = SettingsManager.getDefaultSettings()
        defaultSettings.setProperty('noiseStd', 1.0)
        defaultSettings.setProperty('initialStateDistributionMinRange',
                                    np.asarray([np.pi - np.pi, -2]))
        defaultSettings.setProperty('initialStateDistributionMaxRange',
                                    np.asarray([np.pi + np.pi, 2]))
        defaultSettings.setProperty('initialStateDistributionType', 'Uniform')
        defaultSettings.setProperty('dt', 0.025)
        defaultSettings.setProperty('initSigmaActions', 1.0)
        defaultSettings.setProperty('initialStateDistributionMinRange',
                                    np.asarray([np.pi - np.pi, -2]))
        defaultSettings.setProperty('initialStateDistributionMaxRange',
                                    np.asarray([np.pi + np.pi, 2]))

        self.sampler = EpisodeWithStepsSampler()
        self.episodeManager = self.sampler.getEpisodeDataManager()
        self.stepManager = self.episodeManager.subDataManager
        self.pendulum = Pendulum(self.episodeManager)

        self.sampler.stepSampler.setIsActiveSampler(
            StepBasedEpisodeTerminationSampler(self.episodeManager,
                                               'bla',
                                               numTimeSteps=40))

        initialStateSampler = InitialStateSamplerStandard(self.episodeManager)

        dummyActionAndReward = DummyActionAndReward(
            self.episodeManager.subDataManager, 1)

        self.sampler.setTransitionFunction(self.pendulum.getExpectedNextState)
        self.sampler.setInitStateSampler(initialStateSampler.sampleInitState)
        self.sampler.setActionPolicy(dummyActionAndReward.sampleAction)
        self.sampler.setRewardFunction(dummyActionAndReward.sampleReward)
        self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)

    def testGenerating(self):
        data = self.episodeManager.getDataObject(10)
        self.sampler.numSamples = 100
        self.sampler.setParallelSampling(True)
        data >> self.sampler
        self.assertEqual(data[:, 1].states.shape, (100, 2))
        self.assertEqual(data[1, :].states.shape, (40, 2))
예제 #11
0
class Test(unittest.TestCase):
    def setUp(self):
        self.sampler = EpisodeWithStepsSampler()
        self.episodeManager = self.sampler.getEpisodeDataManager()
        double_link = DoubleLink(self.episodeManager)
        self.sampler.stepSampler.setIsActiveSampler(
            StepBasedEpisodeTerminationSampler(self.episodeManager, 'steps',
                                               40))

        initialStateSampler = InitialStateSamplerStandard(self.episodeManager)
        dummyActionAndReward = DummyActionAndReward(
            self.episodeManager.subDataManager, 2, True)

        self.sampler.setTransitionFunction(double_link.getExpectedNextState)
        self.sampler.setInitStateSampler(initialStateSampler.sampleInitState)
        self.sampler.setActionPolicy(dummyActionAndReward.sampleAction)
        self.sampler.setRewardFunction(dummyActionAndReward.sampleReward)
        self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)

    def testGenerating(self):
        data = self.episodeManager.getDataObject(10)
        self.sampler.numSamples = 100
        self.sampler.setParallelSampling(True)
        data[...] >> self.sampler
        self.assertEqual(data[:, 1].states.shape, (100, 4))
        self.assertEqual(data[1, :].states.shape, (40, 4))
defaultSettings.setProperty('dt', 0.025)
defaultSettings.setProperty('initSigmaActions', 1e-10)

# ... to sample the initial states ...
defaultSettings.setProperty('initialStateDistributionType', 'Uniform')
defaultSettings.setProperty('numTimeSteps', steps_per_epoch)

# ... and for the image creating preprocessor. Note that the same settings object is used here which is then
# distributed by the setting manager to all its clients. (The Pendulum, the InitalStateSampler as well as the
# Preprocessor going to be setting clients
img_size = 48  # pixels, height and width. (currently only squared images possible)
defaultSettings.setProperty('imgSize', img_size)
defaultSettings.setProperty('lineWidth', 3)  # again pixel

# Next we can create a "Sampler". It will later sample the values from the desired environment
sampler = EpisodeWithStepsSampler()

# We also need the data managers for the episode and step layer - we get them from the sampler
episodeManager = sampler.getEpisodeDataManager()

# Finally, we can create our environment. Try exchanging the DoubleLink with the QuadLink or the Pendulum...
n_link_pendulum = Pendulum(episodeManager)
# ... make sure you change the number of joints too
number_of_joints = 1

# We can still add further settings, e.g. for the initialStateSampler created next
defaultSettings.setProperty(
    'initialStateDistributionMinRange',
    np.tile(np.asarray([np.pi - np.pi, -2]), [number_of_joints]))
defaultSettings.setProperty(
    'initialStateDistributionMaxRange',