Beispiel #1
0
def runFactorialThompsonContextualBandit(config):
    '''
    Just for experimenting with running the contextual bandit code. This uses the factorial version, so
    the actions are chosen separately for each experiment.
    '''
    # Initial experiments - treat this as a factorial - one choice at each timestep but lots of actions
    # First, get the right models to include regression
    conditions = config["conditions"]
    numConditions = [len(curConditions) for curConditions in conditions]
    conditionVectors = makeConditionVectors(numConditions)
    conditionsToActionIndex = {
        tuple(vector): i
        for (i, vector) in zip(range(len(conditionVectors)), conditionVectors)
    }
    models = [[
        logistic_regression.RLogReg(D=6, Lambda=1) for _ in range(curNum)
    ] for curNum in numConditions]
    #     models = [logistic_regression.RLogReg(D=6, Lambda=1) for _ in range(len(conditionVectors))]
    # Then, run thompson sampling
    get_context = lambda row: contextualStructure.get_context(
        row, includeIntercept=True)
    chosen_actions, models = thompson_policy.calculate_thompson_single_bandit_factorial(
        config["rewardFile"],
        numConditions,
        config["outfilePrefix"] + "FactorialArms.csv",
        models,
        conditionsToActionIndex,
        get_context=get_context)

    for curModels in models:
        print([model.w for model in curModels])
Beispiel #2
0
def runUniformRandomBandit(config):
    conditions = config["conditions"]
    numConditions = [len(curConditions) for curConditions in conditions]
    conditionVectors = makeConditionVectors(numConditions)
    models = [logistic_regression.RLogReg(D=6, Lambda=1) for _ in range(len(conditionVectors))]
    # Then, run uniform random sampling
    get_context = lambda row: contextualStructure.get_context(row, includeIntercept=True)
    # epsilon is 1 below, so an action is always chosen uniformly at random
    chosen_actions, models = thompson_policy.calculate_thompson_single_bandit(config["rewardFile"], 
                                                                              len(conditionVectors),
                                                                              config["outfilePrefix"] + "Random.csv",
                                                                              models,
                                                                              epsilon = 1,
                                                                              get_context=get_context)
Beispiel #3
0
def runThompsonContextualBandit(config):
    '''
    Just for experimenting with running the contextual bandit code.
    '''
    # Initial experiments - treat this as a factorial - one choice at each timestep but lots of actions
    # First, get the right models to include regression
    conditions = config["conditions"]
    numConditions = [len(curConditions) for curConditions in conditions]
    conditionVectors = makeConditionVectors(numConditions)
    models = [logistic_regression.RLogReg(D=6, Lambda=1) for _ in range(len(conditionVectors))]
    # Then, run thompson sampling
    get_context = lambda row: contextualStructure.get_context(row, includeIntercept=True)
    chosen_actions, models = thompson_policy.calculate_thompson_single_bandit(config["rewardFile"], 
                                                                              len(conditionVectors),
                                                                              config["outfilePrefix"] + "ExpArms.csv",
                                                                              models,
                                                                              get_context=get_context)
    print([model.w for model in models])