Ejemplo n.º 1
0
 def _create_preprocess(self):
     policy = Sequential(self.task)
     # Network preprocessing.
     # policy.add(Skip, self.config.frameskip)
     # policy.add(Maximum, 2)
     # policy.add(Grayscale)
     # # policy.add(Subsample, (2, 2))
     # # policy.add(Delta)
     # policy.add(History, 3)
     # policy.add(ClampReward)
     # policy.add(EpsilonGreedy, from_=0.5, to=0.5, test=0.5)
     policy.add(Normalize)
     return policy
Ejemplo n.º 2
0
def policy(task, step):
    policy = Sequential(task)
    print('Step:  ', step.__name__)
    print('Input: ', policy.task.observs)
    policy.add(step)
    print('Output:', policy.task.actions)
    policy.add(Random)
    return policy
Ejemplo n.º 3
0
 def train_policies(self):
     trainers = []
     for _ in range(self.config.learners):
         config = AttrDict(self.config.copy())
         model = Model(self._create_network, threads=1)
         model.weights = self.model.weights
         policy = Sequential(self.task)
         policy.add(self._create_preprocess())
         policy.add(Train, config, self, model)
         trainers.append(policy)
     return trainers
Ejemplo n.º 4
0
 def test_policy(self):
     policy = Sequential(self.task)
     policy.add(self._preprocess)
     policy.add(Test, self.model)
     return policy
Ejemplo n.º 5
0
 def _create_preprocess(self):
     policy = Sequential(self.task)
     if self.config.noop_max:
         policy.add(RandomStart, self.config.noop_max)
     if self.config.frame_skip:
         policy.add(Skip, self.config.frame_skip)
     if self.config.frame_max:
         policy.add(Maximum, self.config.frame_max)
     if self.config.history:
         policy.add(Grayscale)
     if self.config.subsample > 1:
         sub = self.config.subsample
         amount = (sub, sub) if self.config.history else (sub, sub, 1)
         policy.add(Subsample, amount)
     if self.config.delta:
         policy.add(Delta)
     if self.config.history:
         policy.add(History, self.config.history)
     policy.add(ClampReward)
     policy.add(Normalize)
     return policy
Ejemplo n.º 6
0
 def _create_preprocess(self):
     policy = Sequential(self.task)
     policy.add(Image)
     if self.config.noop_max:
         policy.add(RandomStart, self.config.noop_max)
     if self.config.frame_skip > 1:
         policy.add(Skip, self.config.frame_skip)
     if self.config.frame_max:
         policy.add(Maximum, self.config.frame_max)
     if self.config.history > 1:
         channels = policy.above_task.observs.shape[-1]
         policy.add(Grayscale, (0.299, 0.587, 0.114)[:channels])
     if self.config.subsample > 1:
         sub = self.config.subsample
         amount = (sub, sub) if self.config.history > 1 else (sub, sub, 1)
         policy.add(Subsample, amount)
     if self.config.delta:
         policy.add(Delta)
     if self.config.history > 1:
         policy.add(History, self.config.history)
     policy.add(Normalize)
     policy.add(ClampReward)
     policy.add(EpsilonGreedy, **self.config.epsilon)
     return policy
Ejemplo n.º 7
0
 def policy(self):
     # TODO: Why doesn't self.task work here?
     policy = Sequential(self._preprocess.task)
     policy.add(self._preprocess)
     policy.add(self)
     return policy
Ejemplo n.º 8
0
 def policy(self):
     policy = Sequential(self._preprocess.task)
     policy.add(self._preprocess)
     policy.add(self)
     return policy
Ejemplo n.º 9
0
 def _prepend_score_step(self, policy):
     combined = Sequential(policy.task)
     combined.add(Score)
     combined.add(policy)
     return combined