def RGPTSds(T, Arms, d, K, Z, repetitions=False): # initialize parameters reg = np.zeros(T) rew = np.zeros(T) # run inital rounds for t in range(0, K): myarm = Arms[t] ynew = g.PlayArm(myarm, myarm.z) myarm.UpdatePosterior(myarm.z, ynew) g.UpdateErews(Arms) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew rew[t] = ynew g.UpdateZs(Arms, myarm, Z) if repetitions: # run full algorithm with repetitions allowed in sequences for t in range(K, T): seqpoint = (t - K) % d #find out where in the sequence we are if seqpoint == 0: zcurr = [arm.z for arm in Arms] SampleFuncts(Arms, d, zcurr, Z) g.UpdateErews(Arms) myseq = GetTSSeqRep(Arms, d, t, K, Z) bestseq = g.GetBestSeqRep(Arms, d, Z) reg2 = g.ErewSeq(bestseq, Z) - g.ErewSeq(myseq, Z) myarm = myseq[seqpoint] ynew = g.PlayArm(myarm, myarm.z) rew[t] = ynew reg[t] = reg2 / d #average out regret from d step lookahead over the d steps # update posterior - we have already chosen sequence so it doesnt matter if we do this now myarm.UpdatePosterior(myarm.z, ynew) # update all z's ready for next round g.UpdateZs(Arms, myarm, Z) # print t, else: # run full algorithm without repetitions in sequences for t in range(K, T): seqpoint = (t - K) % d #find out where in the sequence we are if seqpoint == 0: zcurr = [arm.z for arm in Arms] SampleFuncts(Arms, d, zcurr, Z) g.UpdateErews(Arms) myseq = GetTSSeq(Arms, d, t, K, Z) bestseq = g.GetBestSeq(Arms, d, Z) reg2 = g.ErewSeq(bestseq, Z) - g.ErewSeq(myseq, Z) myarm = myseq[seqpoint] ynew = g.PlayArm(myarm, myarm.z) rew[t] = ynew reg[t] = reg2 / d #average out regret from d step lookahead over the d steps # update posterior - we have already chosen sequence so it doesnt matter if we do this now myarm.UpdatePosterior(myarm.z, ynew) # update all z's ready for next round g.UpdateZs(Arms, myarm, Z) # print t, return reg, rew
def RogueUCBTuned(T, Arms, K, Z, eta, sigma2): # initialize parameters reg = np.zeros(T) rew = np.zeros(T) # run inital rounds for t in range(0, K): myarm = Arms[t] ynew = g.PlayArm(myarm, myarm.z) g.UpdateHist(myarm, myarm.z, ynew) UpdateTheta(myarm) g.UpdateErews(Arms) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew g.UpdateZs(Arms, myarm, Z) rew[t] =ynew # run full algorithm for t in range(K, T): # first caculate up to date ucbs and rewards for arm in Arms: arm.ucb = CalcUCB(arm, t, eta, sigma2) g.UpdateErews(Arms) # select arm and play it myarm = max(Arms, key=attrgetter('ucb')) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew ynew = g.PlayArm(myarm, myarm.z) g.UpdateHist(myarm, myarm.z, ynew) rew[t] = ynew UpdateTheta(myarm) # update all z's ready for next round g.UpdateZs(Arms, myarm, Z) # print t return reg, rew
def RGPTS(T, Arms, K, Z): # initialize parameters reg = np.zeros(T) rew = np.zeros(T) # run inital rounds for t in range(0, K): myarm = Arms[t] ynew = g.PlayArm(myarm, myarm.z) rew[t] = ynew myarm.UpdatePosterior(myarm.z, ynew) g.UpdateErews(Arms) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew g.UpdateZs(Arms, myarm, Z) rew[t] = ynew # run full algorithm for t in range(K, T): # first caculate up to date ucbs and rewards for arm in Arms: GetTS(arm, arm.z, t, K, Z) g.UpdateErews(Arms) # select arm and play it myarm = max(Arms, key=attrgetter('ts')) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew ynew = g.PlayArm(myarm, myarm.z) rew[t] = ynew myarm.UpdatePosterior(myarm.z, ynew) # update all z's ready for next round g.UpdateZs(Arms, myarm, Z) #print t return reg, rew
def UCB(T, Arms, K, Z, sigma2): # initialize parameters reg = np.zeros(T) rew = np.zeros(T) # define ucb, numplays, totrew as dictionaries per arm for arm in Arms: setattr(arm, 'ucbz', {}) setattr(arm, 'numplaysz', {}) setattr(arm, 'totrewz', {}) for z in range(int(Z) + 1): arm.ucbz[z], arm.numplaysz[z], arm.totrewz[z] = 0., 0., 0. # run inital rounds playing each arm at each z t = 0 for j in range(0, K): if t >= T: break myarm = Arms[j] ynew = g.PlayArm(myarm, myarm.z) myarm.numplaysz[int(myarm.z)] += 1 myarm.totrewz[int(myarm.z)] += 1 g.UpdateErews(Arms) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew g.UpdateZs(Arms, myarm, Z) rew[t] = ynew t += 1 if t >= T: break otherarms = [arm for arm in Arms if arm != myarm] for z in range(0, int(Z)): if myarm.numplaysz[int(z)] == 0: while int(myarm.z) != z: # select other arm to play at random while waiting for our arm mytemparm = random.choice(otherarms) ynew = g.PlayArm(mytemparm, mytemparm.z) mytemparm.numplaysz[int(mytemparm.z)] += 1 mytemparm.totrewz[int(mytemparm.z)] += 1 g.UpdateErews(Arms) reg[t] = max([arm.erew for arm in Arms]) - mytemparm.erew g.UpdateZs(Arms, mytemparm, Z) rew[t] = ynew t += 1 if t >= T: break if t >= T: break ynew = g.PlayArm(myarm, myarm.z) myarm.numplaysz[int(myarm.z)] += 1 myarm.totrewz[int(myarm.z)] += 1 g.UpdateErews(Arms) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew g.UpdateZs(Arms, myarm, Z) rew[t] = ynew t += 1 if t >= T: break # print t # run full algorithm while t < T: # first caculate up to date ucbs and rewards for arm in Arms: CalcUCBz(arm, arm.z, t, K, Z) arm.ucb = arm.ucbz[int(arm.z)] g.UpdateErews(Arms) # select arm and play it myarm = max(Arms, key=attrgetter('ucb')) reg[t] = max([arm.erew for arm in Arms]) - myarm.erew ynew = g.PlayArm(myarm, myarm.z) myarm.numplaysz[int(myarm.z)] += 1 myarm.totrewz[int(myarm.z)] += 1 rew[t] = ynew # update all z's ready for next round g.UpdateZs(Arms, myarm, Z) t += 1 #print t return reg, rew