Esempio n. 1
0
	def __init__ (self, bidding, player, p, learning_p, active_P_count):

		self.player = player
		self.learning_p = learning_p
		if active_P_count == 2:
			if player != 0 and player != 2:
				self.legal_bids = [possible_bids.index ("P")]
			else:
				self.legal_bids = self.getLegalBid (bidding)
		else:
			self.legal_bids = self.getLegalBid (bidding)

		self.W = []
		self.N = []
		self.Q = []
		self.P = []
		self.children = []
		self.width = len (self.legal_bids)

		for i in range (self.width):
			self.W.append (0)
			self.N.append (0)
			self.Q.append (0)
			self.P.append (0)
			self.children.append (None)

		if len (self.legal_bids) > 1 and self.player != learning_p:
			self.P = self.addNoise (p [0])
Esempio n. 2
0
def getLegalBid(bidding):

    ret = []
    legal_bids = generateBiddings(bidding, "Competitive")
    for bid in legal_bids:
        ret.append(possible_bids.index(bid))

    return ret
Esempio n. 3
0
	def update (self, value, bid):

		index = self.legal_bids.index (possible_bids.index (bid))
		self.N [index] += 1
		self.W [index] += value
		self.Q [index] = self.W [index] / self.N [index]

		return
Esempio n. 4
0
	def one_hot_encode (self, bids):
		indices = []
		res = np.zeros (POSSIBLE_BID_COUNT * 3)
		for bid in bids [-3:]:
			indices.append (possible_bids.index (bid))
		base = 3 - len (indices)
		for i in range (len (indices)):
			res [indices [i] + POSSIBLE_BID_COUNT * (base + i)] = 1
		return list (res)
Esempio n. 5
0
def expReplay(network_1, par, hands, resTable, mode=2):
    agents = {}
    counter = 0
    stats = list(-1 for i in range(STAT_SIZE * 4))

    temp_Agent = Agent("", {})
    temp_Agent.setHand(hands[0])
    for i in range(STAT_SIZE):
        stats[i] = temp_Agent.stat[i]
    temp_Agent.setHand(hands[2])
    for i in range(STAT_SIZE):
        stats[i + STAT_SIZE * 2] = temp_Agent.stat[i]

    while counter < 10000:
        ended = False
        bids = []
        player = 0
        related_agents = []
        last_agent = None
        while not ended:
            key = str(player) + "|" + toString(bids)
            if key not in agents.keys():
                agents[key] = Agent_Open_Hand(network_1, {},
                                              EXPLORE_COEFFICIENT)
                agents[key].setState((stats, bids.copy(), player),
                                     hands[player])
            agent = agents[key]
            related_agents.append(agent)
            bid = agent.quickMove()
            bids.append(bid)
            bids.append("P")
            if player == 0:
                player = 2
            else:
                player = 0
            # print (bids)
            if bids[-3:] == ["P", "P", "P"]:
                ended = True
            if last_agent != None and last_agent.children[possible_bids.index(
                    bid)] == None:
                last_agent.children[possible_bids.index(bid)] = agent
            last_agent = agent
        score = getScore(bids, resTable)
        # print (counter)
        # print (bids)
        for agent in reversed(related_agents):
            agent.updateValues(score - par)

        counter += 1

    # print ("--------------")
    # print ("--------------")
    # print ("--------------")
    x = []
    y = []
    for key in agents.keys():
        # print (key, max (agents [key].visit_count))
        x.append(agents[key].X)
        y.append(agents[key].visit_count)
    return

    if K.backend() == "tensorflow":
        X = np.asarray(x)
        Y_true = np.asarray(y)

    # print ("X_SHAPE", x.shape)
    # print ("Y_SHAPE", y.shape)
    if _DEBUG > 2:
        session = tf.Session()
        print(session.run(custom_loss(network.predict(X), Y_true)))
        print(network.predict(X))
        print("------")

        network.fit(X, Y_true, epochs=1, verbose=1)

        print(session.run(custom_loss(network.predict(X), Y_true)))
        print(network.predict(X))
        print("------End of Exp Replay-----")