def RGPTSds(T, Arms, d, K, Z, repetitions=False):
    # initialize parameters
    reg = np.zeros(T)
    rew = np.zeros(T)

    # run inital rounds
    for t in range(0, K):
        myarm = Arms[t]
        ynew = g.PlayArm(myarm, myarm.z)
        myarm.UpdatePosterior(myarm.z, ynew)
        g.UpdateErews(Arms)
        reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
        rew[t] = ynew
        g.UpdateZs(Arms, myarm, Z)

    if repetitions:
        # run full algorithm with repetitions allowed in sequences
        for t in range(K, T):
            seqpoint = (t - K) % d  #find out where in the sequence we are
            if seqpoint == 0:
                zcurr = [arm.z for arm in Arms]
                SampleFuncts(Arms, d, zcurr, Z)
                g.UpdateErews(Arms)
                myseq = GetTSSeqRep(Arms, d, t, K, Z)
                bestseq = g.GetBestSeqRep(Arms, d, Z)
                reg2 = g.ErewSeq(bestseq, Z) - g.ErewSeq(myseq, Z)
            myarm = myseq[seqpoint]
            ynew = g.PlayArm(myarm, myarm.z)
            rew[t] = ynew
            reg[t] = reg2 / d  #average out regret from d step lookahead over the d steps
            # update posterior - we have already chosen sequence so it doesnt matter if we do this now
            myarm.UpdatePosterior(myarm.z, ynew)
            # update all z's ready for next round
            g.UpdateZs(Arms, myarm, Z)
            # print t,
    else:
        # run full algorithm without repetitions in sequences
        for t in range(K, T):
            seqpoint = (t - K) % d  #find out where in the sequence we are
            if seqpoint == 0:
                zcurr = [arm.z for arm in Arms]
                SampleFuncts(Arms, d, zcurr, Z)
                g.UpdateErews(Arms)
                myseq = GetTSSeq(Arms, d, t, K, Z)
                bestseq = g.GetBestSeq(Arms, d, Z)
                reg2 = g.ErewSeq(bestseq, Z) - g.ErewSeq(myseq, Z)
            myarm = myseq[seqpoint]
            ynew = g.PlayArm(myarm, myarm.z)
            rew[t] = ynew
            reg[t] = reg2 / d  #average out regret from d step lookahead over the d steps
            # update posterior - we have already chosen sequence so it doesnt matter if we do this now
            myarm.UpdatePosterior(myarm.z, ynew)
            # update all z's ready for next round
            g.UpdateZs(Arms, myarm, Z)
            # print t,

    return reg, rew
def RogueUCBTuned(T, Arms, K, Z, eta, sigma2):
    # initialize parameters
    reg = np.zeros(T)
    rew = np.zeros(T)

    # run inital rounds
    for t in range(0, K):
        myarm = Arms[t]
        ynew = g.PlayArm(myarm, myarm.z)
        g.UpdateHist(myarm, myarm.z, ynew)
        UpdateTheta(myarm)
        g.UpdateErews(Arms)
        reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
        g.UpdateZs(Arms, myarm, Z)
        rew[t] =ynew

    # run full algorithm
    for t in range(K, T):
        # first caculate up to date ucbs and rewards
        for arm in Arms:
            arm.ucb = CalcUCB(arm, t, eta, sigma2)
        g.UpdateErews(Arms)

        # select arm and play it
        myarm = max(Arms, key=attrgetter('ucb'))
        reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
        ynew = g.PlayArm(myarm, myarm.z)
        g.UpdateHist(myarm, myarm.z, ynew)
        rew[t] = ynew
        UpdateTheta(myarm)

        # update all z's ready for next round
        g.UpdateZs(Arms, myarm, Z)
        
        # print t

    return reg, rew
def RGPTS(T, Arms, K, Z):
    # initialize parameters
    reg = np.zeros(T)
    rew = np.zeros(T)

    # run inital rounds
    for t in range(0, K):
        myarm = Arms[t]
        ynew = g.PlayArm(myarm, myarm.z)
        rew[t] = ynew
        myarm.UpdatePosterior(myarm.z, ynew)
        g.UpdateErews(Arms)
        reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
        g.UpdateZs(Arms, myarm, Z)
        rew[t] = ynew

    # run full algorithm
    for t in range(K, T):
        # first caculate up to date ucbs and rewards
        for arm in Arms:
            GetTS(arm, arm.z, t, K, Z)
        g.UpdateErews(Arms)

        # select arm and play it
        myarm = max(Arms, key=attrgetter('ts'))
        reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
        ynew = g.PlayArm(myarm, myarm.z)
        rew[t] = ynew
        myarm.UpdatePosterior(myarm.z, ynew)

        # update all z's ready for next round
        g.UpdateZs(Arms, myarm, Z)

        #print t

    return reg, rew
def UCB(T, Arms, K, Z, sigma2):
    # initialize parameters
    reg = np.zeros(T)
    rew = np.zeros(T)

    # define ucb, numplays, totrew as dictionaries per arm
    for arm in Arms:
        setattr(arm, 'ucbz', {})
        setattr(arm, 'numplaysz', {})
        setattr(arm, 'totrewz', {})
        for z in range(int(Z) + 1):
            arm.ucbz[z], arm.numplaysz[z], arm.totrewz[z] = 0., 0., 0.

    # run inital rounds playing each arm at each z
    t = 0
    for j in range(0, K):
        if t >= T:
            break
        myarm = Arms[j]
        ynew = g.PlayArm(myarm, myarm.z)
        myarm.numplaysz[int(myarm.z)] += 1
        myarm.totrewz[int(myarm.z)] += 1
        g.UpdateErews(Arms)
        reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
        g.UpdateZs(Arms, myarm, Z)
        rew[t] = ynew
        t += 1
        if t >= T:
            break
        otherarms = [arm for arm in Arms if arm != myarm]
        for z in range(0, int(Z)):
            if myarm.numplaysz[int(z)] == 0:
                while int(myarm.z) != z:
                    # select other arm to play at random while waiting for our arm
                    mytemparm = random.choice(otherarms)
                    ynew = g.PlayArm(mytemparm, mytemparm.z)
                    mytemparm.numplaysz[int(mytemparm.z)] += 1
                    mytemparm.totrewz[int(mytemparm.z)] += 1
                    g.UpdateErews(Arms)
                    reg[t] = max([arm.erew for arm in Arms]) - mytemparm.erew
                    g.UpdateZs(Arms, mytemparm, Z)
                    rew[t] = ynew
                    t += 1
                    if t >= T:
                        break
                if t >= T:
                    break
                ynew = g.PlayArm(myarm, myarm.z)
                myarm.numplaysz[int(myarm.z)] += 1
                myarm.totrewz[int(myarm.z)] += 1
                g.UpdateErews(Arms)
                reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
                g.UpdateZs(Arms, myarm, Z)
                rew[t] = ynew
                t += 1
                if t >= T:
                    break

    # print t

    # run full algorithm
    while t < T:
        # first caculate up to date ucbs and rewards
        for arm in Arms:
            CalcUCBz(arm, arm.z, t, K, Z)
            arm.ucb = arm.ucbz[int(arm.z)]
        g.UpdateErews(Arms)

        # select arm and play it
        myarm = max(Arms, key=attrgetter('ucb'))
        reg[t] = max([arm.erew for arm in Arms]) - myarm.erew
        ynew = g.PlayArm(myarm, myarm.z)
        myarm.numplaysz[int(myarm.z)] += 1
        myarm.totrewz[int(myarm.z)] += 1
        rew[t] = ynew

        # update all z's ready for next round
        g.UpdateZs(Arms, myarm, Z)

        t += 1

        #print t

    return reg, rew