def train(self, episodes, maxSteps): avgReward = 0 # set up environment and task self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories) self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \ do_decay_beliefs = True, uniformInitialBeliefs = True) # create neural net and learning agent self.params = buildNetwork(self.task.outdim, self.task.indim, \ bias=True, outclass=SoftmaxLayer) if self._PGPE: self.agent = OptimizationAgent(self.params, PGPE(minimize=False, verbose=False)) elif self._CMAES: self.agent = OptimizationAgent( self.params, CMAES(minimize=False, verbose=False)) # init and perform experiment exp = EpisodicExperiment(self.task, self.agent) for i in range(episodes): exp.doEpisodes(1) avgReward += self.task.getTotalReward() print "reward episode ", i, self.task.getTotalReward() # print initial info print "\naverage reward over training = ", avgReward / episodes # save trained network self._saveWeights()
def run(self, maxSteps): self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories) self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \ do_decay_beliefs = True, uniformInitialBeliefs = True) self.task.reset() # load network if we're just running, not training self.params = pickle.load(open('infomaxNet.pkl')) self.params.sorted = False self.params.sortModules() print "\n" while not self.task.isFinished(): # get initial observation of environment obs_pre = self.task.getObservation() print "State pre" #print self.task.showBeliefs() # use formatted print beliefs function print self.task.getObservation() # send observation to net for an action vector action = self.params.activate(obs_pre) # send action vector to robot self.task.performAction(action) print "State post" #print self.task.showBeliefs() print self.task.getObservation() # calculate and show reward print "reward", self.task.getReward() print "\n" print "total reward =", self.task.getTotalReward() print "\n"