def outputConfidenceKnownR(alg, nextStateMul, nObs):
    '''
    Ouput the confidence set for a given algorithm, when P unknown.

    Args:
        alg - string for which algorithm to use
        nextStateMul - how many multiples of good/bad states there are
        nObs - how many observations split between the

    Returns:
        qMax - the 0.05 upper quantile
    '''
    nState = 1 + 2 * nextStateMul
    nextObs = nObs / float(nState - 1)

    # Make the environment
    env = environment.make_confidenceMDP(nextStateMul)

    # Make the feature extractor
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

    # Make the agent
    agent_constructor = alg_dict[alg]
    agent = agent_constructor(env.nState, env.nAction, env.epLen)

    # Letting the agent know the prior
    agent.R_prior[0, 0] = (0, 1e9)
    for s in range(1, nState):
        # Updating the P
        agent.P_prior[s, 0][s] += 1e9
        # Updating the rewards
        agent.R_prior[s, 0] = (s % 2, 1e9)

    for ep in xrange(nObs):
        # Reset the environment
        env.reset()
        agent.update_policy(ep)
        pContinue = 1
        while pContinue > 0:
            # Step through the episode
            h, oldState = f_ext.get_feat(env)
            action = 0
            reward, newState, pContinue = env.advance(action)
            agent.update_obs(oldState, action, reward, newState, pContinue, h)

    agent.update_policy()
    return agent.qVals[0, 0][0]
def outputConfidenceH(alg, epLen, nObs):
    '''
    Ouput the confidence set for a given algorithm, when epLen changes.

    Args:
        alg - string for which algorithm to use
        nextStateMul - how many multiples of good/bad states there are
        nObs - how many observations split between the

    Returns:
        qMax - the 0.05 upper quantile
    '''
    # Make the environment
    env = environment.make_HconfidenceMDP(epLen)

    # Make the feature extractor
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

    # Make the agent
    agent_constructor = alg_dict[alg]
    agent = agent_constructor(env.nState, env.nAction, env.epLen)

    for ep in xrange(nObs):
        # Reset the environment
        env.reset()
        agent.update_policy(ep)
        pContinue = 1
        while pContinue > 0:
            # Step through the episode
            h, oldState = f_ext.get_feat(env)
            action = 0
            reward, newState, pContinue = env.advance(action)
            agent.update_obs(oldState, action, reward, newState, pContinue, h)

    agent.update_policy()
    return agent.qVals[0, 0][0]
                '%03.2f' % args.scaling + '_seed=' + str(args.seed) + '.csv')

    folderName = './'
    targetPath = folderName + fileName
    print('******************************************************************')
    print(fileName)
    print('******************************************************************')

    # Make the environment
    env = environment.make_hardBanditMDP(epLen=args.epLen,
                                         gap=args.gap,
                                         nAction=2,
                                         pSuccess=0.5)

    # Make the feature extractor
    f_ext = FeatureTrueState(env.epLen, env.nState, env.nAction, env.nState)

    # Make the agent
    alg_dict = {
        'PSRL': finite_tabular_agents.PSRL,
        'PSRLunif': finite_tabular_agents.PSRLunif,
        'OptimisticPSRL': finite_tabular_agents.OptimisticPSRL,
        'GaussianPSRL': finite_tabular_agents.GaussianPSRL,
        'UCBVI': finite_tabular_agents.UCBVI,
        'BEB': finite_tabular_agents.BEB,
        'BOLT': finite_tabular_agents.BOLT,
        'UCRL2': finite_tabular_agents.UCRL2,
        'UCRL2_GP': finite_tabular_agents.UCRL2_GP,
        'UCRL2_GP_RTDP': finite_tabular_agents.UCRL2_GP_RTDP,
        'EULER': finite_tabular_agents.EULER,
        'EULER_GP': finite_tabular_agents.EULER_GP,