def act(self, stateObs, _):
        global model
        actionID, _ = model.predict(transformFrame(stateObs, x=110, y=110))

        if actionID >= 4:
            actionID = 0

        return actionID
 def reset(self):
     # This doesn't work if each environment has a different size
     for remote in self.vecenv.remotes:
         remote.send(('reset', None))
     resetFrames = [remote.recv() for remote in self.vecenv.remotes]
     returnList = []
     for frame in resetFrames:
         returnList.append(transformFrame(frame, x=self.x, y=self.y))
     return np.stack(returnList)
    def act(self, stateObs, actions):
        print(actions)
        global model
        actionID, _ = model.predict(transformFrame(stateObs, x=110, y=110))

        if actionID >= len(actions):
            actionID = 0

        return actionID
    def step_wait(self):
        obsList, rewsList, donesList, infosList = ([] for i in range(4))

        for remote in self.vecenv.remotes:
            (ob, rew, done, info) = remote.recv()

            remote.send(('env_method', ('__str__', {}, {})))
            string = remote.recv()

            if not rew == 0: # Don't need to waste time normalizing 0s
                rew = normalizeReward(rew, string)
            rewsList.append(rew)

            obsList.append(transformFrame(ob, x=self.x, y=self.y))
            donesList.append(done)
            infosList.append(info)

        self.vecenv.waiting = False
        return np.stack(obsList), np.stack(rewsList), np.stack(donesList), np.stack(infosList)
Beispiel #5
0
 def act(self, stateObs, _):
     global model
     actionID, _ = model.predict(transformFrame(stateObs, x=300, y=300))
     return actionID