def Play_Random_Test(model, env): env.reset() model.double() model.load_state_dict(torch.load('model.pth')) #AI plays 1 #You play -1 TD = TDAgent(model) action_dict = dict() player = -1 rolls = env.roll_dice() fake_board = copy.deepcopy(env.board) moves = env.all_possible_moves(player, fake_board, rolls) for i, move in enumerate(moves): action_dict[i] = move #Alwats do 0 action = action_dict[0] for a in action: reward, done = env.step(a, env.board, player) while True: player = env.change_player(player) rolls = env.roll_dice() fake_board = copy.deepcopy(env.board) actions = env.all_possible_moves(player, fake_board, rolls) if actions != None: fake = copy.deepcopy(env) best_action, win_prob = TD.select_best_action( actions, fake, player) if len(best_action) != 0: for a in best_action: reward, done = env.step(a, env.board, player) if done: winner = reward break player = env.change_player(player) rolls = env.roll_dice() action_dict = dict() fake_board = copy.deepcopy(env.board) moves = env.all_possible_moves(player, fake_board, rolls) if len(moves) != 0: for i, move in enumerate(moves): action_dict[i] = move #print(action_dict) #select = int(input('Select Action:')) #Alwats do 0 #action = action_dict[select] action = action_dict[0] for a in action: reward, done = env.step(a, env.board, player) if done: winner = reward break return reward
model.double() model.load_state_dict(torch.load('model.pth')) #AI plays 1 #You play -1 env.reset() print(' 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17') print('_________________________________________________________________________') print(env.board) print('_________________________________________________________________________') print(' 18 19 20 21 22 23 24 25 26 27') print('\n') # time.sleep(2) TD = TDAgent(model.double()) print('You play First') action_dict = dict() step = 1 player = -1 rolls = env.roll_dice() fake_board = copy.deepcopy(env.board) moves = env.all_possible_moves(player,fake_board,rolls) for i,move in enumerate(moves): action_dict[i] =move # print(action_dict) # print(moves)
def train(self, iters): network = self TD = TDAgent(network) count = 0 gen_count_new = 0 gen_count_old = 0 for eps in range(iters): env.reset() self.init_eligiblity_trace() rolls = env.roll_dice() player = env.random_player() env.player = player count += 1 print("Calculating Weights: {:.5f}".format(count), end="\r") step = 0 #Saving Model Every 10 steps if count % 100 == 0: torch.save(model.state_dict(), 'model.pth') #See Who wins Every 100 steps fake_1 = copy.deepcopy(env) winner_random = Play_Random_Test(self, fake_1) # if count%50 ==0: # print('\n Random Agent Match Winner: ',winner_random) # clear_output() while True: print('\t\t\t\t Working on Step: ', step, end="\r") step += 1 features = env.board_features() features = torch.tensor(features) p = self.forward(features) fake_board = copy.deepcopy(env.board) actions = env.all_possible_moves(player, fake_board, rolls) # print(actions) if actions != None: fake = copy.deepcopy(env) action, win_prob = TD.select_best_action( actions, fake, player) if len(action) != 0: for a in action: reward, done = env.step(a, env.board, player) features = env.board_features() features = torch.tensor(features) p_next = self.forward(features) if done: loss = self.update_weights(p, reward) # print(loss) break else: loss = self.update_weights(p, p_next) player = env.change_player(player) rolls = env.roll_dice()
def Play_Agent_VS_Old_Agent(env): model = Network() step = 0 old_model = Network() model.double() old_model.double() print('Model_New_Main is 1 ') print('Model_colab_old is -1') model.load_state_dict(torch.load('model.pth')) old_model.load_state_dict(torch.load('model_weak.pth')) TD_new = TDAgent(model) TD_old = TDAgent(old_model) env.reset() player = np.random.choice([-1, 1]) rolls = env.roll_dice() while True: step += 1 print('Player: ', player) print('Rolls: ', rolls) print( ' 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17' ) print( '_________________________________________________________________________' ) print(env.board) print( '_________________________________________________________________________' ) print(' 18 19 20 21 22 23 24 25 26 27') print('\n') fake_board = copy.deepcopy(env.board) actions = env.all_possible_moves(player, fake_board, rolls) if actions != None: fake = copy.deepcopy(env) if player == 1: best_action, win_prob = TD_new.select_best_action( actions, fake, player) elif player == -1: best_action, win_prob = TD_old.select_best_action( actions, fake, player) if len(best_action) != 0: for a in best_action: reward, done = env.step(a, env.board, player) if done: print('Won in %s moves!' % (step)) winner = reward break player = change_player(player) rolls = env.roll_dice() return winner