def exp3(self, numActions, gamma, rewardMin=0, rewardMax=1): #weights = [1.0] * numActions t = 0 while True: probabilityDistribution = distr(self.weights, gamma) choice = draw(probabilityDistribution) self.choiceFreq[choice] += 1 #put choice in the queue self.choice_queue.put(choice) self.choice_queue.join() #get reward from queue theReward = self.reward_queue.get() self.reward_queue.task_done() #theReward = reward(choice, t) scaledReward = (theReward - rewardMin) / ( rewardMax - rewardMin) # rewards scaled to 0,1 probChoice = float(self.choiceFreq[choice] + 1) / (t + 1) #estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice] #estimatedReward = (1.0 * scaledReward) / probChoice estimatedReward = 1.0 * scaledReward * self.distanceLastchoice( t, choice) self.weights[choice] *= math.exp( estimatedReward * gamma / numActions) # important that we use estimated reward here! yield choice, theReward, estimatedReward, self.weights t = t + 1
def exp3(self,numActions, gamma, rewardMin = 0, rewardMax = 1): t = 0 while True: probabilityDistribution = distr(self.weights, gamma) choice = draw(probabilityDistribution) self.choiceFreq[choice]+=1 #put choice in the queue self.choice_queue.put(choice) self.choice_queue.join() #get reward from queue theReward = self.reward_queue.get() self.reward_queue.task_done() #theReward = reward(choice, t) scaledReward = (theReward - rewardMin) / (rewardMax - rewardMin) # rewards scaled to 0,1 #-case frequency #probChoice=float(self.choiceFreq[choice] +1)/(t+1) #estimatedReward = (1.0 * scaledReward) / probChoice #-case distribution #estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice] #-case distance estimatedReward = 1.0 * scaledReward * self.distanceLastchoice(t,choice) self.weights[choice] *= math.exp(estimatedReward * gamma / numActions) # important that we use estimated reward here! yield choice, theReward, estimatedReward, self.weights t = t + 1
def exp3(numActions, reward, gamma, rewardMin = 0, rewardMax = 1): weights = [1.0] * numActions t = 0 while True: probabilityDistribution = distr(weights, gamma) choice = draw(probabilityDistribution) theReward = reward(choice, t) scaledReward = (theReward - rewardMin) / (rewardMax - rewardMin) # rewards scaled to 0,1 estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice] weights[choice] *= math.exp(estimatedReward * gamma / numActions) # important that we use estimated reward here! yield choice, theReward, estimatedReward, weights t = t + 1
def exp3(numActions, reward, gamma, rewardMin=0, rewardMax=1): weights = [1.0] * numActions t = 0 while True: probabilityDistribution = distr(weights, gamma) choice = draw(probabilityDistribution) theReward = reward(choice, t) scaledReward = (theReward - rewardMin) / (rewardMax - rewardMin ) # rewards scaled to 0,1 estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice] weights[choice] *= math.exp( estimatedReward * gamma / numActions) # important that we use estimated reward here! yield choice, theReward, estimatedReward, weights t = t + 1
def bidding(self): bid = draw(self.pi) return (bid * self.eps)