Example #1
0
 def evaluteFrontier(self,parent,utilities,tfam,pca,cmodels):
     from SongData import SongData, findevent
     from generate_random_policy import updateNodeAndFeature
     terminus = None
     candidates = []
     prospects = copy.deepcopy(parent.transformedprospects)
     for action in prospects.keys():
         candidate = copy.deepcopy(parent)
         candidate.generatedbyaction = action
         event = findevent(action, candidate.context.songPitchMap.getPitchMax(SongData.REST))            
         updateNodeAndFeature(candidate,event,tfam,pca,cmodels)
         if action == SongData.END:
             terminus = candidate
         elif candidate.transformedfeature not in utilities[candidate.feature.vector[0]]:
             utilities[candidate.feature.vector[0]][candidate.transformedfeature] = 0
         candidates.append(candidate)        
     
     reward = max([utilities[c.feature.vector[0]][c.transformedfeature] for c in candidates if c.feature != SongData.END])
     maxlist = [(c,prospects[c.generatedbyaction]) for c in candidates if c.feature!=SongData.END and utilities[c.feature.vector[0]][c.transformedfeature]==reward]
     #if tie for best action, draw based on corpus incidence, otherwise list will only have one element anywayz
     drawbest= lambda s : random.choice(sum(([v]*wt for v,wt in s),[]))
     argcand = drawbest(maxlist)
     if terminus is not None:
         total_prospects = sum([prospects[pro] for pro in prospects])
         if np.random.uniform(0,1) < float(prospects[SongData.END])/total_prospects:
             reward, argcand = 0, terminus
     return reward, argcand
 #def actions(self, feature):
 #    if state in self.terminals:
 #        return [None]
 #    else:
 #        total = sum([ tfam[state.feature][action] for action in tfam[state.feature].keys() ])
 #        return [ [float(count) / total, nextstate(state, action)] for action in tfam[state.feature].keys() ]
Example #2
0
def value_iteration(mdp, tfam,pca,cmodels,epsilon = 0.001, MIN_INNER_ITERATIONS = 10000):
    """Solving an MDP by value iteration. [Fig. 17.4]"""
    from generate_random_policy import updateNodeAndFeature, initializetrajectory
    from SongData import SongData
    
    new_utilities = dict()
    new_utilities[0]= dict() 
    new_utilities[1] = dict()

    R, gamma = mdp.R,  mdp.gamma
    
    s_events = initializetrajectory(mdp.startstates)

    while True:
        print 'OUTER LOOP OF VALUE ITERATION--'
        old_utilities = copy.deepcopy(new_utilities)
        start, event, parent = s_events
        updateNodeAndFeature(parent,event,tfam,pca,cmodels) 
        delta = 0
        iteration = 0
        #walk the space, updating each state by its most lucrative neighbor and following same trajectory
        while parent.generatedbyaction != SongData.END:
            iteration+=1
            maximum_value_action, child = mdp.evaluteFrontier(parent,old_utilities,tfam,pca,cmodels)
            originalfeature=parent.feature.vector       
            parity = originalfeature[0]
            tfeat = tuple(pca.transform(originalfeature).reshape(pca.n_components))
            featurecluster = np.asscalar(clustermodelheuristic(originalfeature,cmodels).predict(tfeat))
            new_utilities[parity][featurecluster] = R(tfeat) + gamma * maximum_value_action
            if featurecluster not in old_utilities[parity]:
                old_utilities[parity][featurecluster] = 0 
            delta = max(delta, abs(new_utilities[parity][featurecluster] - old_utilities[parity][featurecluster] ))
            parent = child
        print('iteration #:' + str(iteration))
        print '\tmax delta: ' + str(delta)
        if delta < epsilon * (1 - gamma) / gamma:
            return old_utilities