예제 #1
0
def getPrototypeDistribution(i):
	dist = []
	prototype = prototypes[i]
	for state in range(maxState):
		tempPrototype = Prototype(numActions, stateDimension)
		tempPrototype.setFixed([state/float(maxState)], 0)
		tempFeatureDiff = tempPrototype.calculateDiff(prototype)
		membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))
		dist.append(membershipGrade*prototype.getTheta())
	return dist
예제 #2
0
    def getV(self, state):
        tempPrototype = Prototype(1, self.stateDimension)
        tempPrototype.setFixed([state / float(self.maxState + 1)], 0)
        thetaSum = 0

        pIndex = [0] * self.numGroups
        pGrade = self.getPrototypeIndex(state, pIndex)

        for i in range(len(pIndex)):
            thetaSum += self.prototypes[pIndex[i]].getTheta() * pGrade[i]

        return thetaSum
예제 #3
0
	def getV(self, state):
		tempPrototype = Prototype(1, self.stateDimension)
		tempPrototype.setFixed([state/float(self.maxState+1)], 0)
		thetaSum = 0

		pIndex = [0]*self.numGroups
		pGrade = self.getPrototypeIndex(state, pIndex)

		for i in range(len(pIndex)):
			thetaSum += self.prototypes[pIndex[i]].getTheta()*pGrade[i]

		return thetaSum
예제 #4
0
def getQ(state, action):
	tempPrototype = Prototype(numActions, stateDimension)
	tempPrototype.setFixed([state/float(maxState)], action)
	thetaSum = 0

	for prototype in prototypes:
		tempFeatureDiff = tempPrototype.calculateDiff(prototype)
		membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))
		# print('state: ' + str(tempPrototype.state) + '  prototype.state: ' + str(prototype.state) + '  membershipGrade: ' + str(membershipGrade) + '  theta: ' + str(prototype.getTheta()))
		# raw_input("Press Enter to continue...")
		thetaSum += prototype.getTheta() * membershipGrade

	# print('thetaSum: ' + str(thetaSum))
	# raw_input("Press Enter to continue...")

	return thetaSum
예제 #5
0
	def getPrototypeIndex(self, state, prototypeIndex):
		tempPrototype = Prototype(1, self.stateDimension)
		tempPrototype.setFixed([state/float(self.maxState + 1)], 0)
		pGrade = [0]*self.numGroups

		for i in range(len(self.prototypes)):
			prototype = self.prototypes[i]
			tempFeatureDiff = tempPrototype.calculateDiff(prototype)

			membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))#/sqrt(2*prototype.getFeatureWidth()*pi)
			if pGrade[prototype.group] < membershipGrade:
				prototypeIndex[prototype.group] = i
				pGrade[prototype.group] = membershipGrade


		return [p/sum(pGrade) for p in pGrade]
예제 #6
0
def generatePrototypes():

	if isRandom:
		for i in range(numPrototypes):
			numDifferent = 0
			prototypes.append(Prototype(numActions, stateDimension))
			while numDifferent < i:
				if prototypes[i].isDifferent(prototypes[numDifferent]):
					numDifferent += 1
				else:
					prototypes[i].setRandomly()
					numDifferent = 0

	else :
		for i in range(numPrototypes):
			p = Prototype(numActions, stateDimension)
			p.setFixed([i/float(numPrototypes+1) ], 0)
			prototypes.append(p)
예제 #7
0
    def getPrototypeIndex(self, state, prototypeIndex):
        tempPrototype = Prototype(1, self.stateDimension)
        tempPrototype.setFixed([state / float(self.maxState + 1)], 0)
        pGrade = [0] * self.numGroups

        for i in range(len(self.prototypes)):
            prototype = self.prototypes[i]
            tempFeatureDiff = tempPrototype.calculateDiff(prototype)

            membershipGrade = float(
                exp(-(tempFeatureDiff * tempFeatureDiff) / 2 *
                    prototype.getFeatureWidth())
            )  #/sqrt(2*prototype.getFeatureWidth()*pi)
            if pGrade[prototype.group] < membershipGrade:
                prototypeIndex[prototype.group] = i
                pGrade[prototype.group] = membershipGrade

        return [p / sum(pGrade) for p in pGrade]
예제 #8
0
	def generatePrototypes(self, isRandom):

		if isRandom:
			for i in range(self.numPrototypes):
				numDifferent = 0
				self.prototypes.append(Prototype(1, self.stateDimension))
				while numDifferent < i:
					if self.prototypes[i].isDifferent(self.prototypes[numDifferent]):
						numDifferent += 1
					else:
						self.prototypes[i].setRandomly()
						numDifferent = 0

		else :
			for i in range(self.numPrototypes):
				groupNum = i % self.numGroups
				p = Prototype(1, self.stateDimension, groupNum)
				p.setFixed([i/float(self.maxState+1) ], 0)
				self.prototypes.append(p)
예제 #9
0
    def generatePrototypes(self, isRandom):

        if isRandom:
            for i in range(self.numPrototypes):
                numDifferent = 0
                self.prototypes.append(Prototype(1, self.stateDimension))
                while numDifferent < i:
                    if self.prototypes[i].isDifferent(
                            self.prototypes[numDifferent]):
                        numDifferent += 1
                    else:
                        self.prototypes[i].setRandomly()
                        numDifferent = 0

        else:
            for i in range(self.numPrototypes):
                groupNum = i % self.numGroups
                p = Prototype(1, self.stateDimension, groupNum)
                p.setFixed([i / float(self.maxState + 1)], 0)
                self.prototypes.append(p)
예제 #10
0
def learn(state1, action1, reward, state2):
	# maxQ = -float('inf')
	# for a in range(numActions):
	# 	tempQ = getQ(state2, a)
	# 	if (maxQ < tempQ):
	# 		maxQ = tempQ

	### sarsa

	maxQ = getQ(state2, 0)

	preQ = getQ(state1, action1)

	delta = reward + gamma*maxQ - preQ

	tempPrototype = Prototype(numActions, stateDimension)
	tempPrototype.setFixed([state1/float(maxState)], action1)

	for prototype in prototypes:
		tempFeatureDiff = tempPrototype.calculateDiff(prototype)

		membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))
		prototype.setTheta(prototype.getTheta() + alpha * delta * membershipGrade/numPrototypes)