예제 #1
0
def nextstate(state, price):
    disidx = dfs(state)
    t = tfs(state)
    nextdis = min(
        modelt4.cocoef * disstates[disidx] + modelt4.phi(t, customer, price),
        disstates[-1])
    return getstate(t + 1, nextdis)
예제 #2
0
def actionablestatesfrom(state):
    global maxdisseen
    t = tfs(state)
    nextdisvalues = np.round(
            [modelt4.cocoef*disstates[dfs(state)]+modelt4.phi(t,customer,a) for a in actions if a >= modelt4.wholeprice(t)], -2)
    maxdisseen = max(maxdisseen, max(nextdisvalues))
    nextdisvalues = [min(value, disstates[-1]) for value in nextdisvalues]
    idx = getdisstate(nextdisvalues)
    return list(set([(t) + i * (ntimeslots+1) for i in idx]))
예제 #3
0
#dis = [modelt4.phi(t,customer,retailprices[t-1]) for t in range(1,25)]
bestpolicy = list()
dislist = list()
policystate = initstate
action = np.argmax(qmatrix[policystate,:])
price = actions[action]
dislist.append(initdis)
bestpolicy.append(price)
cumdislist = [dislist[0]]
sar = [(policystate, price, reward(policystate, customer, price))]
#policystate = np.argmax(qmatrix[:,:], axis=1)
for t in range(2,25):
    policystate = nextstate(policystate, price)
    action = np.argmax(qmatrix[policystate,:])
    price = actions[action]
    dislist.append(modelt4.phi(t,customer,price))
    cumdislist.append(disstates[dfs(policystate)])
    bestpolicy.append(price)
    sar.append((policystate, price, reward(policystate, customer, price)))
print(bestpolicy)




# VISUALISE INPUT AND OUTPUT DATA

# Combined plot
def plotresults():
    import matplotlib.pyplot as plt
    trange = list(range(1,25))
    barw = 0.25