예제 #1
0
파일: agent.py 프로젝트: ntrntr/ua-ros-pkg
    def train(self, episodes, maxSteps):

        avgReward = 0

        # set up environment and task
        self.env = InfoMaxEnv(self.objectNames, self.actionNames,
                              self.numCategories)
        self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
           do_decay_beliefs = True, uniformInitialBeliefs = True)

        # create neural net and learning agent
        self.params = buildNetwork(self.task.outdim, self.task.indim, \
            bias=True, outclass=SoftmaxLayer)

        if self._PGPE:
            self.agent = OptimizationAgent(self.params,
                                           PGPE(minimize=False, verbose=False))
        elif self._CMAES:
            self.agent = OptimizationAgent(
                self.params, CMAES(minimize=False, verbose=False))

        # init and perform experiment
        exp = EpisodicExperiment(self.task, self.agent)

        for i in range(episodes):
            exp.doEpisodes(1)
            avgReward += self.task.getTotalReward()
            print "reward episode ", i, self.task.getTotalReward()

        # print initial info
        print "\naverage reward over training = ", avgReward / episodes

        # save trained network
        self._saveWeights()
예제 #2
0
파일: agent.py 프로젝트: ntrntr/ua-ros-pkg
    def run(self, maxSteps):

        self.env = InfoMaxEnv(self.objectNames, self.actionNames,
                              self.numCategories)
        self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
           do_decay_beliefs = True, uniformInitialBeliefs = True)
        self.task.reset()

        # load network if we're just running, not training
        self.params = pickle.load(open('infomaxNet.pkl'))
        self.params.sorted = False
        self.params.sortModules()

        print "\n"
        while not self.task.isFinished():

            # get initial observation of environment
            obs_pre = self.task.getObservation()

            print "State pre"
            #print self.task.showBeliefs()		# use formatted print beliefs function
            print self.task.getObservation()

            # send observation to net for an action vector
            action = self.params.activate(obs_pre)

            # send action vector to robot
            self.task.performAction(action)

            print "State post"
            #print self.task.showBeliefs()
            print self.task.getObservation()

            # calculate and show reward
            print "reward", self.task.getReward()
            print "\n"

        print "total reward =", self.task.getTotalReward()
        print "\n"