Ejemplo n.º 1
0
class Test(unittest.TestCase):

    # Test that sampler and data managers are set up correctly
    def setUp(self):

        defaultSettings = SettingsManager.getDefaultSettings()
        defaultSettings.setProperty('numTimeSteps', 20)

        self.sampler = EpisodeWithStepsSampler(samplerNameSteps='steps')
        self.sampler.numSamples = 3
        self.dataManager = self.sampler.getEpisodeDataManager()
        self.sampler.stepSampler.setTerminationFunction(
            NumStepsTerminationFunction(self.dataManager,
                                        None,
                                        numTimeSteps=20))
        environment = MountainCar(self.dataManager)

        self.sampler.setContextSampler(environment.sampleContext)
        self.sampler.setActionPolicy(environment.sampleAction)
        self.sampler.setTransitionFunction(environment.transitionFunction)
        self.sampler.setRewardFunction(environment.sampleReward)
        self.sampler.setInitStateSampler(environment.sampleInitState)

    def test_sampleData(self):
        self.sampler.setParallelSampling(True)
        newData = self.dataManager.createDataObject([3, 20])
        self.sampler >> newData[...]
        print('States:', newData[1, :].states)
        print('NextStates:', newData[1, :].nextStates)
Ejemplo n.º 2
0
class Test(unittest.TestCase):
    def setUp(self):
        self.sampler = EpisodeWithStepsSampler()
        self.dataManager = self.sampler.getEpisodeDataManager()

        self.tf = TransitionFunction(self.dataManager, 2, 1)

    # Test that sampler and data managers are set up correctly
    def test_init(self):

        stepManager = self.sampler.getEpisodeDataManager().subDataManager
        self.assertIn('states', stepManager.dataEntries)
        self.assertIn('actions', stepManager.dataEntries)
        # self.assertIn('nextStates', stepManager.dataEntries)

        stepSampler = self.sampler.stepSampler
Ejemplo n.º 3
0
class Test(unittest.TestCase):
    def setUp(self):
        self.sampler = EpisodeWithStepsSampler()
        self.episodeManager = self.sampler.getEpisodeDataManager()
        double_link = DoubleLink(self.episodeManager)
        self.sampler.stepSampler.setIsActiveSampler(
            StepBasedEpisodeTerminationSampler(self.episodeManager, 'steps',
                                               40))

        initialStateSampler = InitialStateSamplerStandard(self.episodeManager)
        dummyActionAndReward = DummyActionAndReward(
            self.episodeManager.subDataManager, 2, True)

        self.sampler.setTransitionFunction(double_link.getExpectedNextState)
        self.sampler.setInitStateSampler(initialStateSampler.sampleInitState)
        self.sampler.setActionPolicy(dummyActionAndReward.sampleAction)
        self.sampler.setRewardFunction(dummyActionAndReward.sampleReward)
        self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)

    def testGenerating(self):
        data = self.episodeManager.getDataObject(10)
        self.sampler.numSamples = 100
        self.sampler.setParallelSampling(True)
        data[...] >> self.sampler
        self.assertEqual(data[:, 1].states.shape, (100, 4))
        self.assertEqual(data[1, :].states.shape, (40, 4))
Ejemplo n.º 4
0
class Test(unittest.TestCase):
    def setUp(self):
        defaultSettings = SettingsManager.getDefaultSettings()
        defaultSettings.setProperty('noiseStd', 1.0)
        defaultSettings.setProperty('initialStateDistributionMinRange',
                                    np.asarray([np.pi - np.pi, -2]))
        defaultSettings.setProperty('initialStateDistributionMaxRange',
                                    np.asarray([np.pi + np.pi, 2]))
        defaultSettings.setProperty('initialStateDistributionType', 'Uniform')
        defaultSettings.setProperty('dt', 0.025)
        defaultSettings.setProperty('initSigmaActions', 1.0)
        defaultSettings.setProperty('initialStateDistributionMinRange',
                                    np.asarray([np.pi - np.pi, -2]))
        defaultSettings.setProperty('initialStateDistributionMaxRange',
                                    np.asarray([np.pi + np.pi, 2]))

        self.sampler = EpisodeWithStepsSampler()
        self.episodeManager = self.sampler.getEpisodeDataManager()
        self.stepManager = self.episodeManager.subDataManager
        self.pendulum = Pendulum(self.episodeManager)

        self.sampler.stepSampler.setIsActiveSampler(
            StepBasedEpisodeTerminationSampler(self.episodeManager,
                                               'bla',
                                               numTimeSteps=40))

        initialStateSampler = InitialStateSamplerStandard(self.episodeManager)

        dummyActionAndReward = DummyActionAndReward(
            self.episodeManager.subDataManager, 1)

        self.sampler.setTransitionFunction(self.pendulum.getExpectedNextState)
        self.sampler.setInitStateSampler(initialStateSampler.sampleInitState)
        self.sampler.setActionPolicy(dummyActionAndReward.sampleAction)
        self.sampler.setRewardFunction(dummyActionAndReward.sampleReward)
        self.sampler.setReturnFunction(dummyActionAndReward.sampleReward)

    def testGenerating(self):
        data = self.episodeManager.getDataObject(10)
        self.sampler.numSamples = 100
        self.sampler.setParallelSampling(True)
        data >> self.sampler
        self.assertEqual(data[:, 1].states.shape, (100, 2))
        self.assertEqual(data[1, :].states.shape, (40, 2))
Ejemplo n.º 5
0
    @Mapping.MappingMethod()
    def getAction(self, states):
        return states * 2 * np.ones(2)


class TestReward(Mapping):
    def __init__(self, dataManager):
        Mapping.__init__(self, dataManager, ['states', 'actions'], ['rewards'])

    @Mapping.MappingMethod()
    def getReward(self, states, actions):
        return states[:, 0:1] * 2


sampler = EpisodeWithStepsSampler()
dataManager = sampler.getEpisodeDataManager()

environment = TestEnvironment(dataManager)
policy = TestPolicy(dataManager)
reward = TestReward(dataManager)

sampler.setInitStateSampler(environment.initState)
sampler.setTransitionFunction(environment)
sampler.setActionPolicy(policy)
sampler.setRewardFunction(reward)

data = dataManager.createDataObject([1, 20])

data[...] >> sampler >> data

states = data.getDataEntry('states', 1)