def evaluteFrontier(self,parent,utilities,tfam,pca,cmodels): from SongData import SongData, findevent from generate_random_policy import updateNodeAndFeature terminus = None candidates = [] prospects = copy.deepcopy(parent.transformedprospects) for action in prospects.keys(): candidate = copy.deepcopy(parent) candidate.generatedbyaction = action event = findevent(action, candidate.context.songPitchMap.getPitchMax(SongData.REST)) updateNodeAndFeature(candidate,event,tfam,pca,cmodels) if action == SongData.END: terminus = candidate elif candidate.transformedfeature not in utilities[candidate.feature.vector[0]]: utilities[candidate.feature.vector[0]][candidate.transformedfeature] = 0 candidates.append(candidate) reward = max([utilities[c.feature.vector[0]][c.transformedfeature] for c in candidates if c.feature != SongData.END]) maxlist = [(c,prospects[c.generatedbyaction]) for c in candidates if c.feature!=SongData.END and utilities[c.feature.vector[0]][c.transformedfeature]==reward] #if tie for best action, draw based on corpus incidence, otherwise list will only have one element anywayz drawbest= lambda s : random.choice(sum(([v]*wt for v,wt in s),[])) argcand = drawbest(maxlist) if terminus is not None: total_prospects = sum([prospects[pro] for pro in prospects]) if np.random.uniform(0,1) < float(prospects[SongData.END])/total_prospects: reward, argcand = 0, terminus return reward, argcand #def actions(self, feature): # if state in self.terminals: # return [None] # else: # total = sum([ tfam[state.feature][action] for action in tfam[state.feature].keys() ]) # return [ [float(count) / total, nextstate(state, action)] for action in tfam[state.feature].keys() ]
def value_iteration(mdp, tfam,pca,cmodels,epsilon = 0.001, MIN_INNER_ITERATIONS = 10000): """Solving an MDP by value iteration. [Fig. 17.4]""" from generate_random_policy import updateNodeAndFeature, initializetrajectory from SongData import SongData new_utilities = dict() new_utilities[0]= dict() new_utilities[1] = dict() R, gamma = mdp.R, mdp.gamma s_events = initializetrajectory(mdp.startstates) while True: print 'OUTER LOOP OF VALUE ITERATION--' old_utilities = copy.deepcopy(new_utilities) start, event, parent = s_events updateNodeAndFeature(parent,event,tfam,pca,cmodels) delta = 0 iteration = 0 #walk the space, updating each state by its most lucrative neighbor and following same trajectory while parent.generatedbyaction != SongData.END: iteration+=1 maximum_value_action, child = mdp.evaluteFrontier(parent,old_utilities,tfam,pca,cmodels) originalfeature=parent.feature.vector parity = originalfeature[0] tfeat = tuple(pca.transform(originalfeature).reshape(pca.n_components)) featurecluster = np.asscalar(clustermodelheuristic(originalfeature,cmodels).predict(tfeat)) new_utilities[parity][featurecluster] = R(tfeat) + gamma * maximum_value_action if featurecluster not in old_utilities[parity]: old_utilities[parity][featurecluster] = 0 delta = max(delta, abs(new_utilities[parity][featurecluster] - old_utilities[parity][featurecluster] )) parent = child print('iteration #:' + str(iteration)) print '\tmax delta: ' + str(delta) if delta < epsilon * (1 - gamma) / gamma: return old_utilities