Beispiel #1
0
    def makeCPTpure(self, setCPT=True):
        """Convert mixed CPT to pure CPT,

        :arg setCPT: if True (default), then the CPT attribute is converted to
           a pure CPT. Otherwise, the output is a pure CPT.
        :type setCPT: bool

        .. note::

           whenever there are multiple argmax's, each gets equal probability in
           the resuling "pure" CPT.

        """
        if setCPT:
            self.CPT = convert_2_pureCPT(self.CPT)
        else:
            return convert_2_pureCPT(copy.copy(self.CPT))
Beispiel #2
0
    def makeCPTpure(self, setCPT=True):
        """Convert mixed CPT to pure CPT,

        :arg setCPT: if True (default), then the CPT attribute is converted to
           a pure CPT. Otherwise, the output is a pure CPT.
        :type setCPT: bool

        .. note::

           whenever there are multiple argmax's, each gets equal probability in
           the resuling "pure" CPT.

        """
        if setCPT:
            self.CPT = convert_2_pureCPT(self.CPT)
        else:
            return convert_2_pureCPT(copy.copy(self.CPT))
Beispiel #3
0
    def train_node(self, nodename, level, logit=False, setCPT=False, verbose=False):
        """Compute level-k best response at the DN given Game

        :arg nodename: the name of the decision node where MCEUs are estimated
        :type nodename: str
        :arg level: The level at which to train that player
        :type level: int
        :arg setCPT: If the trained CPT should be set as the current CPT.
            Otherwise, it can be accessed through node.LevelCPT.  Default is
            False
        :type setCPT: bool

        Notes
        -----

        If training a player at level k, the other players' CPT will be accessed
        through self.Game.node_dict[other_player].LevelCPT[k-1]

        """
        print 'Training ' + nodename + ' at level ' + str(level)
        Game = copy.deepcopy(self.Game)  # copy in order to maintain original CPT
        ps = self.specs
        for node in Game.node_dict.values():  # Game changes, self.Game doesn't
            if type(node) is pynfg.DecisionNode:
                try:
                    node.CPT = node.LevelCPT[level - 1]
                except KeyError:
                    raise KeyError('Need to train other players at level %s'
                                   % str(level-1))
        EUtable = mceu(Game, nodename, Game.node_dict[nodename].N,
                       Game.node_dict[nodename].tol, Game.node_dict[nodename].delta,
                       verbose=verbose)
        if not logit:
            self.Game.node_dict[nodename].LevelCPT[level] = \
                  convert_2_pureCPT(EUtable)
            if setCPT:
                self.Game.node_dict[nodename].CPT = convert_2_pureCPT(EUtable)
        else:
            weight = np.exp(Game.node_dict[nodename].beta*EUtable)
            norm = np.sum(weight, axis=-1)
            self.Game.node_dict[nodename].LevelCPT[level] = \
            weight/norm[..., np.newaxis]
            if setCPT:
                self.Game.node_dict[nodename].CPT = weight/norm[..., np.newaxis]
Beispiel #4
0
def bestresponse_node(Game, dn, N, delta=1, tol=30, verbose=False):
    """Compute level-k best response at the DN given Game

    :arg Game: the Network Form Game of interest
    :type Game: SemiNFG or iterSemiNFG
    :arg dn: the name of the decision node where MCEUs are estimated
    :type dn: str
    :arg N: the max number of iterations for the estimation
    :type N: int
    :arg tol: the minimum number of samples per parent value
    :type tol: int

    """
    G = copy.deepcopy(Game)
    EUtable = mceu(G, dn, N, tol, delta, verbose)
    G.node_dict[dn].CPT = convert_2_pureCPT(EUtable)
    return G
Beispiel #5
0
    def train_node(self, bn, level, setCPT=False):
        """Solve for the optimal policy using Optimistic Q-learning. Optimistic
        Q-Learning  is an off-policy TD control RL algorithm

        :arg bn: The basename of the node with the CPT to be trained
        :type bn: str
        :arg level: The level at which to train the basename
        :type level: int
        """

        print 'Training ' + bn + ' at level '+ str(level)
        Game = copy.deepcopy(self.Game)
        ps = self.specs
        player = Game.bn_part[bn][0].player
        w, d, N, r_max = ps[player]['w'], ps[player]['delta'], ps[player][bn]['N'], \
            ps[player][bn]['r_max']
        #Set other CPTs to level-1.  Works even if CPTs aren't pointers.
        for o_player in Game.players:
            bn_list = list(set(map(lambda x: x.basename, Game.partition[o_player])))
            for base in bn_list:
                if base != bn:
                    for dn in Game.bn_part[base]:
                        try:
                            dn.CPT = \
                                (self.trained_CPTs[o_player][base][level - 1])
                        except KeyError:
                            raise KeyError('Need to train other players at level %s'
                                   % str(level-1))
        T0 = Game.starttime #get the start time
        T = Game.endtime + 1 #get the end time
        shape = Game.bn_part[bn][T0].CPT.shape #the shape of CPT
        if d<1:
            Q0 = r_max*((1-d**(T-T0))/(1-d)) #the initial q value
        else:
            Q0 = r_max*(T-T0)
        Q = Q0 * np.ones(shape) #the initial q table
        visit = np.zeros(shape)
        #the number of times each (m,a) pair has been visited.
        r_av = 0 #the dynamic (discounted) average reward
        rseries = [] #a series of average rewards
        for ep in xrange(N):
            print ep
            #convert Q table to CPT
            Game.bn_part[bn][T0].CPT = convert_2_pureCPT(Q)
            Game.sample_timesteps(T0,T0) #sample the start time step
            malist = Game.bn_part[bn][T0].dict2list_vals(valueinput= \
                                                            Game.bn_part[bn][T0].value)
            #get the list of (m,a) pair from the iterated semi-NFG
            mapair = Game.bn_part[bn][T0].get_CPTindex(malist) #get CPT index
            r = Game.reward(player,T0) #get the (discounted) reward
            if ep != 0: #to avoid "divided by 0" error
                r_av_new = r_av + (r-r_av)/((T-1)*ep) #update the dynamic reward
            Qmax = Q[mapair] #get the maximum q value
            for t in xrange(T0+1,T):
                Game.bn_part[bn][t].CPT = convert_2_pureCPT(Q) #convert Q table to CPT
                Game.sample_timesteps(t,t) #sample the current time step
                if t!= (T-1): #required by Q-learning
                    r = d**t*Game.reward(player,t) # get the (discounted) reward
                    r_av_new = r_av + (r-r_av)/((T-1)*ep+t) #update the reward
                malist_new = Game.bn_part[bn][t].dict2list_vals(valueinput= \
                                                            Game.bn_part[bn][t].value)
                mapair_new = Game.bn_part[bn][t].get_CPTindex(malist_new)
                visit[mapair] = visit[mapair] + 1 #update the number of times
                alpha = (1/(1+visit[mapair]))**w #the learning rate
                Qmax_new = Q[mapair_new] #new maximum q value
                Q[mapair] = Qmax + alpha*(r + d*Qmax_new -Qmax) #update q table
                mapair = mapair_new
                Qmax = Qmax_new
                r_av = r_av_new
            rseries.append(r_av)
        self.trained_CPTs[player][bn][level] = Game.bn_part[bn][0].CPT
        plt.figure()
        plt.plot(rseries, label = str(bn + ' Level ' + str(level)))
        #plotting rseries to gauge convergence
        plt.legend()
        fig = plt.gcf()
        self.figs[bn][str(level)] = fig
        if setCPT:
            map(lambda x: _setallCPTs(self.Game,bn, x, Game.bn_part[bn][0].CPT), np.arange(T0, T))