def nextstate(state, price): disidx = dfs(state) t = tfs(state) nextdis = min( modelt4.cocoef * disstates[disidx] + modelt4.phi(t, customer, price), disstates[-1]) return getstate(t + 1, nextdis)
def actionablestatesfrom(state): global maxdisseen t = tfs(state) nextdisvalues = np.round( [modelt4.cocoef*disstates[dfs(state)]+modelt4.phi(t,customer,a) for a in actions if a >= modelt4.wholeprice(t)], -2) maxdisseen = max(maxdisseen, max(nextdisvalues)) nextdisvalues = [min(value, disstates[-1]) for value in nextdisvalues] idx = getdisstate(nextdisvalues) return list(set([(t) + i * (ntimeslots+1) for i in idx]))
#dis = [modelt4.phi(t,customer,retailprices[t-1]) for t in range(1,25)] bestpolicy = list() dislist = list() policystate = initstate action = np.argmax(qmatrix[policystate,:]) price = actions[action] dislist.append(initdis) bestpolicy.append(price) cumdislist = [dislist[0]] sar = [(policystate, price, reward(policystate, customer, price))] #policystate = np.argmax(qmatrix[:,:], axis=1) for t in range(2,25): policystate = nextstate(policystate, price) action = np.argmax(qmatrix[policystate,:]) price = actions[action] dislist.append(modelt4.phi(t,customer,price)) cumdislist.append(disstates[dfs(policystate)]) bestpolicy.append(price) sar.append((policystate, price, reward(policystate, customer, price))) print(bestpolicy) # VISUALISE INPUT AND OUTPUT DATA # Combined plot def plotresults(): import matplotlib.pyplot as plt trange = list(range(1,25)) barw = 0.25