Exemple #1
0
class InfoMaxAgent():

	def __init__(self, option):

		# examine command-line argument for learning algorithm
		if option[1] == "PGPE":
			self._PGPE = True; self._CMAES = False	
		elif option[1] == "CMAES":
			self._PGPE = False; self._CMAES = True
		else: self._PGPE = True

		# tuples are name, category
		self.objectNames = [('pink_glass',0),           # 0
                            ('german_ball',1),          # 1
                            ('blue_cup',2),             # 2
                            ('blue_spiky_ball',3),      # 3
                            ('screw_box',4),            # 4
                            ('wire_spool',5),           # 5
                            ('sqeaky_ball',6),          # 6
                            ('duck_tape_roll',7),       # 7
                            ('ace_terminals',8),        # 8
                            ('chalkboard_eraser',9),    # 9
                           ]
		#self.actionNames = ["pick up", "drop", "push", "squeeze"]
		self.actionNames = ['grasp',        # 0
                            'lift',         # 1
                            'drop',         # 2
                            'shake_roll',   # 3
                            'place',        # 4
                            'push',         # 5
                            'shake_pitch',  # 6
                           ]

		# objects and their categories
		self.numCategories = 10

		#self.actionNames = ["pick up", "drop", "push", "squeeze", "move left", "move right", "reset"]

	# run trained network in our environment
	def run(self, maxSteps):

		self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories)
		self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
					do_decay_beliefs = True, uniformInitialBeliefs = True)
		self.task.reset()

		# load network if we're just running, not training
		self.params = pickle.load(open('infomaxNet.pkl'))
		self.params.sorted = False
		self.params.sortModules()

		print "\n"
		while not self.task.isFinished():
	
			# get initial observation of environment	
			obs_pre = self.task.getObservation()

			print "State pre"
			#print self.task.showBeliefs()		# use formatted print beliefs function
			print self.task.getObservation()

			# send observation to net for an action vector
			action = self.params.activate(obs_pre)	

			# send action vector to robot
			self.task.performAction(action) 

			print "State post"
			#print self.task.showBeliefs()
			print self.task.getObservation()

			# calculate and show reward
			print "reward",self.task.getReward()
			print "\n"

		print "total reward =",self.task.getTotalReward()
		print "\n" 

	# train a new network with PGPE or CMAES
	def train(self, episodes, maxSteps):
 	
		avgReward = 0

		# set up environment and task
		self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories)
		self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
					do_decay_beliefs = True, uniformInitialBeliefs = True)

		# create neural net and learning agent
		self.params = buildNetwork(self.task.outdim, self.task.indim, \
						bias=True, outclass=SoftmaxLayer)

		if self._PGPE:
			self.agent = OptimizationAgent(self.params, PGPE(minimize=False,verbose=False))
		elif self._CMAES:
			self.agent = OptimizationAgent(self.params, CMAES(minimize=False,verbose=False))

		# init and perform experiment
		exp = EpisodicExperiment(self.task, self.agent)

		for i in range(episodes):        
			exp.doEpisodes(1)
			avgReward += self.task.getTotalReward()
			print "reward episode ",i,self.task.getTotalReward()

		# print initial info
		print "\naverage reward over training = ",avgReward/episodes

		# save trained network
		self._saveWeights()

	# save and pickle trained weights for later reuse
	def _saveWeights(self):

		# import weights into network and save network

		if self._PGPE:
			for i in range(len(self.params.params)):
				self.params.params[i] = self.agent.learner.current[i]
			pickle.dump(self.params, open('infomaxNet.pkl','w'))

		elif self._CMAES:

			################ following code came from WWInfoMaxCMAES.py script from ICDL 2010 paper
			arz = randn(self.agent.learner.numParameters, self.agent.learner.batchSize)
			arx = tile(self.agent.learner.center.reshape(self.agent.learner.numParameters, 1),\
			(1, self.agent.learner.batchSize)) + \
			self.agent.learner.stepSize * dot(dot(self.agent.learner.B, self.agent.learner.D), arz)
			# Go through the parameters and pick the current best 
			arfitness = zeros(self.agent.learner.batchSize)
			for k in xrange(self.agent.learner.batchSize):
			  self.agent.learner.wrappingEvaluable._setParameters(arx[:, k]);
			  arfitness[k] = self.agent.learner._BlackBoxOptimizer__evaluator\
			(self.agent.learner.wrappingEvaluable)

			# Sort by fitness and compute weighted mean into center
			tmp = sorted(map(lambda (x, y): (y, x), enumerate(ravel(arfitness))))
			arfitness = array(map(lambda x: x[0], tmp))
			arindex = array(map(lambda x: int(x[1]), tmp))

			arz = arz[:, arindex]
			curparams = arx[:, arindex[0]];

			# update network weights with selected parameters
			for i in range(len(self.params.params)):
				self.params.params[i] = curparams[i]
			# save trained network
			pickle.dump(self.params, open('infomaxNet.pkl','w'))
Exemple #2
0
class InfoMaxAgent():
    def __init__(self, option):

        # examine command-line argument for learning algorithm
        if option[1] == "PGPE":
            self._PGPE = True
            self._CMAES = False
        elif option[1] == "CMAES":
            self._PGPE = False
            self._CMAES = True
        else:
            self._PGPE = True

        # tuples are name, category
        self.objectNames = [
            ('pink_glass', 0),  # 0
            ('german_ball', 1),  # 1
            ('blue_cup', 2),  # 2
            ('blue_spiky_ball', 3),  # 3
            ('screw_box', 4),  # 4
            ('wire_spool', 5),  # 5
            ('sqeaky_ball', 6),  # 6
            ('duck_tape_roll', 7),  # 7
            ('ace_terminals', 8),  # 8
            ('chalkboard_eraser', 9),  # 9
        ]
        #self.actionNames = ["pick up", "drop", "push", "squeeze"]
        self.actionNames = [
            'grasp',  # 0
            'lift',  # 1
            'drop',  # 2
            'shake_roll',  # 3
            'place',  # 4
            'push',  # 5
            'shake_pitch',  # 6
        ]

        # objects and their categories
        self.numCategories = 10

        #self.actionNames = ["pick up", "drop", "push", "squeeze", "move left", "move right", "reset"]

    # run trained network in our environment
    def run(self, maxSteps):

        self.env = InfoMaxEnv(self.objectNames, self.actionNames,
                              self.numCategories)
        self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
           do_decay_beliefs = True, uniformInitialBeliefs = True)
        self.task.reset()

        # load network if we're just running, not training
        self.params = pickle.load(open('infomaxNet.pkl'))
        self.params.sorted = False
        self.params.sortModules()

        print "\n"
        while not self.task.isFinished():

            # get initial observation of environment
            obs_pre = self.task.getObservation()

            print "State pre"
            #print self.task.showBeliefs()		# use formatted print beliefs function
            print self.task.getObservation()

            # send observation to net for an action vector
            action = self.params.activate(obs_pre)

            # send action vector to robot
            self.task.performAction(action)

            print "State post"
            #print self.task.showBeliefs()
            print self.task.getObservation()

            # calculate and show reward
            print "reward", self.task.getReward()
            print "\n"

        print "total reward =", self.task.getTotalReward()
        print "\n"

    # train a new network with PGPE or CMAES
    def train(self, episodes, maxSteps):

        avgReward = 0

        # set up environment and task
        self.env = InfoMaxEnv(self.objectNames, self.actionNames,
                              self.numCategories)
        self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
           do_decay_beliefs = True, uniformInitialBeliefs = True)

        # create neural net and learning agent
        self.params = buildNetwork(self.task.outdim, self.task.indim, \
            bias=True, outclass=SoftmaxLayer)

        if self._PGPE:
            self.agent = OptimizationAgent(self.params,
                                           PGPE(minimize=False, verbose=False))
        elif self._CMAES:
            self.agent = OptimizationAgent(
                self.params, CMAES(minimize=False, verbose=False))

        # init and perform experiment
        exp = EpisodicExperiment(self.task, self.agent)

        for i in range(episodes):
            exp.doEpisodes(1)
            avgReward += self.task.getTotalReward()
            print "reward episode ", i, self.task.getTotalReward()

        # print initial info
        print "\naverage reward over training = ", avgReward / episodes

        # save trained network
        self._saveWeights()

    # save and pickle trained weights for later reuse
    def _saveWeights(self):

        # import weights into network and save network

        if self._PGPE:
            for i in range(len(self.params.params)):
                self.params.params[i] = self.agent.learner.current[i]
            pickle.dump(self.params, open('infomaxNet.pkl', 'w'))

        elif self._CMAES:

            ################ following code came from WWInfoMaxCMAES.py script from ICDL 2010 paper
            arz = randn(self.agent.learner.numParameters,
                        self.agent.learner.batchSize)
            arx = tile(self.agent.learner.center.reshape(self.agent.learner.numParameters, 1),\
            (1, self.agent.learner.batchSize)) + \
            self.agent.learner.stepSize * dot(dot(self.agent.learner.B, self.agent.learner.D), arz)
            # Go through the parameters and pick the current best
            arfitness = zeros(self.agent.learner.batchSize)
            for k in xrange(self.agent.learner.batchSize):
                self.agent.learner.wrappingEvaluable._setParameters(arx[:, k])
                arfitness[k] = self.agent.learner._BlackBoxOptimizer__evaluator\
              (self.agent.learner.wrappingEvaluable)

            # Sort by fitness and compute weighted mean into center
            tmp = sorted(
                map(lambda (x, y): (y, x), enumerate(ravel(arfitness))))
            arfitness = array(map(lambda x: x[0], tmp))
            arindex = array(map(lambda x: int(x[1]), tmp))

            arz = arz[:, arindex]
            curparams = arx[:, arindex[0]]

            # update network weights with selected parameters
            for i in range(len(self.params.params)):
                self.params.params[i] = curparams[i]
            # save trained network
            pickle.dump(self.params, open('infomaxNet.pkl', 'w'))