예제 #1
0
    def move(self, board):
        # Convert to numpy array
        start_state = self.make_np_array(board)

        # get ally's predicted move
        # ally_predicted_action = self.model_cnn.predict(start_state)
        # print("ally", ally_predicted_action)
        # identify which 5 output nodes to take max from
        # ally_alist = ally_predicted_action[0].tolist()
        # ally_action = ally_alist.index(max(ally_alist))
        # start_index = ally_action * self.num_actions

        # TODO Remove later, add cnn back
        # get ally's next move
        tmp_board = play_round(board, 4)
        ally_action = tmp_board.players[0].action
        start_index = ally_action * self.num_actions

        # Make a new prediction now that the weights have been updated
        all_q_vals = self.model_dqn.predict([start_state, self.mask1])
        if ally_action == self.num_actions - 1:
            q_vals = all_q_vals[0][start_index:]
        else:
            q_vals = all_q_vals[0][start_index:start_index + self.num_actions]
        qlist = q_vals.tolist()
        action = qlist.index(max(qlist))
        self.action = action
        return get_desired_space_from_action(self.position, action)
예제 #2
0
def play(labels):
    extended_labels = labels + list(range(max(labels) + 1, million + 1))
    current, index = labels[0], build_index(extended_labels)

    for i in range(10 * million):
        current, index = play_round(current, index)

    return index[1] * index[index[1]]
예제 #3
0
파일: sandbox.py 프로젝트: jwh9456/daifugo
def test_play(student):
    hands = game.deal()
    players = (student, student, student, student)
    discard = []
    lp = 0
    game_over = False

    retval = []
    round = 0
    t_pass = True

    while not game_over:
        round += 1
        retval.append('Round {0}'.format(round))
        retval.append([
            'HANDS',
            [
                'player{0}: {1}'.format(i, sorted(h))
                for i, h in enumerate(hands)
            ]
        ])
        prev_hands = copy.deepcopy(hands)
        prev_lp = lp
        try:
            hands, lp, game_over, discard = game.play_round(
                hands, players, discard, lp, 'raise')
        except game.InvalidAction, e:
            retval.append(['INVALID ACTION', [e, e.call]])
            t_pass = False
            break
        retval.append([
            'PLAYS',
            [
                'player{0}: {1}'.format((prev_lp + i) % len(players), str(p))
                for i, p in enumerate(discard[-1])
            ]
        ])

        if prev_hands == hands:
            # Nobody played, so we break out or we would be stuck
            game_over = True
            retval.append("GAME OVER: Nobody played")

        else:
            retval.append(['WINNER', ['player{0}'.format(lp)]])
예제 #4
0
파일: sandbox.py 프로젝트: saffsd/daifugo
def test_play(student):
    hands = game.deal()
    players = (student, student, student, student)
    discard = []
    lp = 0
    game_over = False

    retval = []
    round = 0
    t_pass = True

    while not game_over:
        round += 1
        retval.append("Round {0}".format(round))
        retval.append(["HANDS", ["player{0}: {1}".format(i, sorted(h)) for i, h in enumerate(hands)]])
        prev_hands = copy.deepcopy(hands)
        prev_lp = lp
        try:
            hands, lp, game_over, discard = game.play_round(hands, players, discard, lp, "raise")
        except game.InvalidAction, e:
            retval.append(["INVALID ACTION", [e, e.call]])
            t_pass = False
            break
        retval.append(
            [
                "PLAYS",
                ["player{0}: {1}".format((prev_lp + i) % len(players), str(p)) for i, p in enumerate(discard[-1])],
            ]
        )

        if prev_hands == hands:
            # Nobody played, so we break out or we would be stuck
            game_over = True
            retval.append("GAME OVER: Nobody played")

        else:
            retval.append(["WINNER", ["player{0}".format(lp)]])
예제 #5
0
from game import labels, example_labels, play_round, hash_labels

for i in range(100):
    labels = play_round(labels)

print(hash_labels(labels))
예제 #6
0
    def move(self, board):

        # Convert to numpy array
        start_state = self.make_np_array(board)

        # epsilon goes from 1 down to 0.1
        if self.epsilon > 0.1:
            self.epsilon -= 0.000001

        # get ally's predicted move
        # ally_predicted_action = self.model_cnn.predict(start_state)
        # print("ally", ally_predicted_action)
        # identify which 5 output nodes to take max from
        # ally_alist = ally_predicted_action[0].tolist()
        # ally_action = ally_alist.index(max(ally_alist))
        # start_index = ally_action * self.num_actions

        # TODO Remove later, add cnn back
        # get ally's next move
        tmp_board = play_round(board, 4)
        ally_action = tmp_board.players[0].action
        tmp_list = [0, 0, 0, 0, 0]
        tmp_list[ally_action] = 1
        ally_np = np.array(tmp_list, dtype=np.uint8)
        ally_np = np.reshape(ally_np, (1, 5))

        # get Q values
        all_q_vals = self.model_dqn.predict([start_state, ally_np, self.mask5])

        # Choose your action: random action if < epsilon or best action if > epsilon
        rand_val = random.random()
        if rand_val < self.epsilon:
            action = random.randint(0, 4)
        else:
            qlist = all_q_vals[0].tolist()
            action = qlist.index(max(qlist))

        # Play one game iteration with the chosen action
        new_board = play_round(board, action)
        new_state = self.make_np_array(new_board)

        #######################
        #  reward calculation
        #######################
        reward = 0
        # first add actual points (use team scores rather than just the player's score)
        # team 1 score minus team 2 score
        # reward += (new_board.score[0] - board.score[0] + new_board.score[1] - board.score[1] -
        #           (new_board.score[2] - board.score[2] + new_board.score[3] - board.score[3]))
        reward += (new_board.score[1] - board.score[1]) * 10

        # get everyone else's targets
        # find your target based on closest food in direction of action taken (don't consider foods 'behind' you)
        # consider that you could have just eaten your target and it moved
        # if you and ally share same target, lose points, unless any enemy is closer to the same target than your ally
        # very small reward for moving toward center of map
        # other rewards

        # convert action into np
        tmp_list = [0, 0, 0, 0, 0]
        tmp_list[action] = 1
        action_np = np.array(tmp_list, dtype=np.uint8)
        action_np = np.reshape(action_np, (1, 5))
        self.memory.append(
            [start_state, ally_np, action_np, reward, new_state])

        # Fit (when there are enough samples)
        if len(self.memory) > self.min_memory_size:
            batch = random.sample(self.memory, self.batch_size)
            start_states = np.array([np.squeeze(x[0]) for x in batch],
                                    dtype=np.uint8)  # remove first dimension
            ally_actions = np.array([np.squeeze(x[1]) for x in batch],
                                    dtype=np.uint8)  # remove first dimension
            actions = np.array([np.squeeze(x[2]) for x in batch],
                               dtype=np.uint8)  # remove first dimension
            rewards = np.array([x[3] for x in batch])
            next_states = np.array([np.squeeze(x[4]) for x in batch],
                                   dtype=np.uint8)  # remove first dimension
            self.fit_batch(start_states, ally_actions, actions, rewards,
                           next_states)
            self.iteration += 1

        # Make a new prediction now that the weights have been updated
        all_q_vals = self.model_dqn.predict([start_state, ally_np, self.mask5])
        qlist = all_q_vals[0].tolist()
        if self.iteration % 10000 == 1:
            print(qlist)
        action = qlist.index(max(qlist))
        return get_desired_space_from_action(self.position, action)
예제 #7
0
def main():
    """Controlling the game flow"""

    echo.clear()
    print(constants.WELCOME_MESSAGE)
    
    # getting towns
    towns = init.get_towns()
    num_towns = len(towns)
    rng_towns = range(num_towns)
    
    # getting hometown and number of managers in hometown
    managers = [0 for town in rng_towns]
    (hometown, num_managers) = init.get_managers(towns)
    
    managers[towns.index(hometown)] = num_managers
    
    # getting the number of days to be played
    period = init.get_timeframe()
    
    # getting possible wins for each city
    potentials = init.get_potentials(towns)
    
    # getting the network (adjacency)
    network = init.get_network(towns)

    hotels = [0 for town in rng_towns]
    
    score_of_today = [0 for town in rng_towns]
    score = 0
    
    # mapping cities to integers (inverse of towns)
    cities = dict((town, towns.index(town)) for town in towns)
    
    # mapping integers to towns (inverse of cities)
    towns = dict((towns.index(town), town) for town in towns)
    
    # state represents the most important data structure of the whole program
    # it contains the essential attributes of the game
    state = dict((town,[towns[town], managers[town], hotels[town], \
                potentials[town], score_of_today[town], network[town]]) \
                for town in rng_towns)
    
    
    day = 1
    
    # main loop over the number of days to be played        
    while day < period + 1:
        echo.clear()    
        echo.headline(day)
        
        days_left = period - day
        [state, day_shift, city] = game.play_round(state, period, days_left, \
                                                    towns, cities)

        # special case: hire -> day_shift > 1
        if day_shift > 1:
            # new manager is active with the beginning of day d + 2
            # where d is the day he was hired (in the morning)
            for shift in range(day_shift):
                # state enters as state at the end of day + shift
                profit_today = sum([state[town][4] for town in rng_towns])
                if shift > 0:
                    print("Automatische Berechnung ...\n")
                    time.sleep(.5)
                print("\nStatus am Ende von Tag " + str(day + shift))
                echo.status(state)
                print("Gewinn an Tag " + str(day + shift) + ": " + \
                        str(profit_today))
                
                # adding the profit of the current day to the overall score
                score += profit_today
                print("Gesamtgewinn mit Ende von Tag " + str(day + shift) + \
                        ": " + str(score) + "\n")
                
                # resetting the score of the day
                for town in rng_towns:
                    state[town][4] = 0
                
                # we want the manager to be effective with beginning of
                # day + (shift = 2)
                if shift == 1:
                    state[city][1] += 1
                
                # turning state into evening mode
                state = game.calculate_profit(state)
                shift += 1
                
                # special case: resetting state to morning mode at the end
                # of the final iteration
                if shift == day_shift:
                    for town in rng_towns:
                        state[town][4] = 0
                
                input()
                echo.clear()

        # every other case
        else:
            profit_today = sum([state[town][4] for town in rng_towns])

            print("\nStatus am Ende von Tag " + str(day) + \
              " (" + str(period) + "):")
            echo.status(state)

            print("Gewinn an Tag " + str(day) + ": " + str(profit_today))
            score += profit_today

            print("Gesamtgewinn mit Ende von Tag " + str(day) + ": " + \
                  str(score) + "\n")
            
            # setting the score of the day to 0 for beginning of next day
            for town in rng_towns:
                state[town][4] = 0
            
            input()
            echo.clear()
        
        day += day_shift
      
    print("Spielende!\n")
    print("Gesamtgewinn im Spiel: " + str(score) + "\n")    
    
    name = get_name()
    save_score(score, name)

    print(constants.GOODBYE_MESSAGE)