예제 #1
0
def Play_Random_Test(model, env):
    env.reset()
    model.double()

    model.load_state_dict(torch.load('model.pth'))

    #AI plays 1
    #You play -1

    TD = TDAgent(model)

    action_dict = dict()

    player = -1
    rolls = env.roll_dice()
    fake_board = copy.deepcopy(env.board)
    moves = env.all_possible_moves(player, fake_board, rolls)

    for i, move in enumerate(moves):
        action_dict[i] = move

    #Alwats do 0
    action = action_dict[0]

    for a in action:
        reward, done = env.step(a, env.board, player)

    while True:

        player = env.change_player(player)
        rolls = env.roll_dice()

        fake_board = copy.deepcopy(env.board)
        actions = env.all_possible_moves(player, fake_board, rolls)

        if actions != None:
            fake = copy.deepcopy(env)
            best_action, win_prob = TD.select_best_action(
                actions, fake, player)

            if len(best_action) != 0:
                for a in best_action:
                    reward, done = env.step(a, env.board, player)

        if done:
            winner = reward
            break

        player = env.change_player(player)
        rolls = env.roll_dice()

        action_dict = dict()
        fake_board = copy.deepcopy(env.board)
        moves = env.all_possible_moves(player, fake_board, rolls)

        if len(moves) != 0:
            for i, move in enumerate(moves):
                action_dict[i] = move

            #print(action_dict)

            #select = int(input('Select Action:'))

            #Alwats do 0
            #action = action_dict[select]
            action = action_dict[0]

            for a in action:
                reward, done = env.step(a, env.board, player)

        if done:
            winner = reward
            break

    return reward
예제 #2
0
model.double()

model.load_state_dict(torch.load('model.pth'))

#AI plays 1
#You play -1
env.reset()
print('  0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17')
print('_________________________________________________________________________')
print(env.board)
print('_________________________________________________________________________')
print('  18  19  20  21  22  23  24  25  26  27')
print('\n')
# time.sleep(2)

TD = TDAgent(model.double())

print('You play First')
action_dict = dict()
step = 1

player =  -1
rolls = env.roll_dice()
fake_board = copy.deepcopy(env.board)
moves = env.all_possible_moves(player,fake_board,rolls)

for i,move in enumerate(moves):
   action_dict[i] =move

# print(action_dict)
# print(moves)
예제 #3
0
    def train(self, iters):

        network = self

        TD = TDAgent(network)
        count = 0
        gen_count_new = 0
        gen_count_old = 0

        for eps in range(iters):
            env.reset()
            self.init_eligiblity_trace()

            rolls = env.roll_dice()
            player = env.random_player()

            env.player = player

            count += 1
            print("Calculating Weights: {:.5f}".format(count), end="\r")

            step = 0
            #Saving Model Every 10 steps
            if count % 100 == 0:
                torch.save(model.state_dict(), 'model.pth')

            #See Who wins Every 100 steps
            fake_1 = copy.deepcopy(env)
            winner_random = Play_Random_Test(self, fake_1)

            # if count%50 ==0:
            #   print('\n Random Agent Match Winner: ',winner_random)
            #   clear_output()

            while True:

                print('\t\t\t\t Working on Step: ', step, end="\r")
                step += 1

                features = env.board_features()
                features = torch.tensor(features)
                p = self.forward(features)
                fake_board = copy.deepcopy(env.board)
                actions = env.all_possible_moves(player, fake_board, rolls)
                # print(actions)
                if actions != None:
                    fake = copy.deepcopy(env)
                    action, win_prob = TD.select_best_action(
                        actions, fake, player)
                    if len(action) != 0:
                        for a in action:
                            reward, done = env.step(a, env.board, player)
                features = env.board_features()
                features = torch.tensor(features)
                p_next = self.forward(features)

                if done:
                    loss = self.update_weights(p, reward)
                    # print(loss)
                    break
                else:
                    loss = self.update_weights(p, p_next)

                player = env.change_player(player)
                rolls = env.roll_dice()
예제 #4
0
def Play_Agent_VS_Old_Agent(env):

    model = Network()
    step = 0
    old_model = Network()

    model.double()
    old_model.double()

    print('Model_New_Main is 1 ')
    print('Model_colab_old is -1')

    model.load_state_dict(torch.load('model.pth'))

    old_model.load_state_dict(torch.load('model_weak.pth'))

    TD_new = TDAgent(model)

    TD_old = TDAgent(old_model)

    env.reset()
    player = np.random.choice([-1, 1])

    rolls = env.roll_dice()

    while True:
        step += 1
        print('Player: ', player)
        print('Rolls: ', rolls)
        print(
            '  0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17'
        )
        print(
            '_________________________________________________________________________'
        )
        print(env.board)
        print(
            '_________________________________________________________________________'
        )
        print('  18  19  20  21  22  23  24  25  26  27')
        print('\n')

        fake_board = copy.deepcopy(env.board)
        actions = env.all_possible_moves(player, fake_board, rolls)

        if actions != None:
            fake = copy.deepcopy(env)

            if player == 1:
                best_action, win_prob = TD_new.select_best_action(
                    actions, fake, player)

            elif player == -1:
                best_action, win_prob = TD_old.select_best_action(
                    actions, fake, player)

            if len(best_action) != 0:
                for a in best_action:
                    reward, done = env.step(a, env.board, player)

        if done:
            print('Won in %s moves!' % (step))
            winner = reward
            break

        player = change_player(player)
        rolls = env.roll_dice()

    return winner