def test(mainQN,sess): saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint('chk/dqvrand')) e = 0 logs = [] wins = [] game = Game(verbose=False) while e <=1000: e+=1 if not game.game_over: state = game.space feed = {mainQN.inputs_: state.reshape((1, *state.shape))} As = sess.run(mainQN.output, feed_dict=feed) avail = game.avail() availQ = {} for k in avail: availQ[k]=As[0][k] action = max(availQ,key=availQ.get) game.move(action,1) game.step(1) if game.game_over: wins.append(game.game_over) log = game.setup() logs.append(log) continue move = game.random_space() game.move(move,2) game.step(2) win, comp, bloc = 0, 0, 0 c = Counter(wins) r = GameRate(verbose=False, list=logs,player=1,opponent=2) r.check_games() #print(r,c) win= c[1] / len(wins) print("win percentage",win) if (r.completions + r.missed_completions)>0: comp = r.completions / (r.completions + r.missed_completions) print("immediate completions",comp) if (r.blocks + r.missed_blocks)>0: bloc = r.blocks / (r.blocks + r.missed_blocks) print("blocks",bloc) #exit(1) if win ==0.0: print(wins) exit(1) return win,comp,bloc
for ep in range(1, train_episodes): total_reward = 0 t = 0 while t < max_steps: if not game.game_over: step += 1 explore_p = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * step) if explore_p > np.random.rand(): # Make a random action action = game.random_space() else: # Get action from Q-network feed = {mainQN.inputs_: state.reshape((1, *state.shape))} Qs = sess.run(mainQN.output, feed_dict=feed) #print(Qs) avail = game.avail() availQ = {} for i in avail: availQ[i]=Qs[0][i] action = max(availQ,key=availQ.get) game.move(action, 1) next_state, reward = game.step() total_reward += reward if game.game_over: # the episode ends so no next state next_state = np.zeros(state.shape) t = max_steps if loss: print('Episode: {}'.format(ep), 'Total reward: {}'.format(total_reward),
def test(sess, actor1, actor2): game = Game(verbose=False) logs = [] wins = [] for i in range(TEST_EPISODES): game.setup() s = game.space terminal = False for j in range(MAX_EP_STEPS): if not terminal: a = actor1.predict(np.reshape(game.space, (1, *s.shape))) avail = game.avail() availQ = {} for i in avail: availQ[i] = a[0][i] action = max(availQ, key=availQ.get) #game.random_space() # # print(a) game.move(action, 1) s2, r = game.step(1) terminal = game.game_over info = None if terminal: wins.append(game.game_over) log = game.setup() logs.append(log) s = game.space break else: a = actor2.predict(np.reshape(game.space, (1, *s.shape))) avail = game.avail() availQ = {} for i in avail: availQ[i] = a[0][i] action = max(availQ, key=availQ.get) # print(a) game.move(action, 2) s2, r = game.step(1) terminal = game.game_over info = None c = Counter(wins) r = GameRate(verbose=False, list=logs, player=1, opponent=2) r2 = GameRate(verbose=False, list=logs, player=2, opponent=1) bloc1, bloc2 = 0, 0 r.check_games() r2.check_games() win_p1 = c[1] / (TEST_EPISODES - 1) print("1win percentage", win_p1) if r.completions + r.missed_completions > 0: comp1 = r.completions / (r.completions + r.missed_completions) else: comp1 = 0 print("1immediate completions", comp1) if r.blocks + r.missed_blocks > 0: bloc1 = r.blocks / (r.blocks + r.missed_blocks) win_p2 = c[2] / (TEST_EPISODES - 1) print("2win percentage", win_p2) if r2.completions + r2.missed_completions > 0: comp2 = r2.completions / (r2.completions + r2.missed_completions) else: comp2 = 0 print("2immediate completions", comp2) if r2.blocks + r2.missed_blocks > 0: bloc2 = r2.blocks / (r2.blocks + r2.missed_blocks) return win_p1, comp1, bloc1, win_p2, comp2, bloc2