def test_on_words(words, giver, guessers, to_print=False): all_turns, all_winloses = [], [] random.seed(90) for word in words: guesser = random.choice(guessers) try: finished_turn, winlose, clues, guesses = play_a_word( word, giver, guesser, to_train=False) except ValueError: print("ERROR ", word) if winlose: all_turns.append(finished_turn) all_winloses.append(winlose) if to_print: print("Word: {}".format(word)) print("Clues: {}".format(clues)) print("Guesses: {}".format(guesses)) print("Win Or Lose {}".format(winlose)) #print(TO_PRINT_ATTENTION) win_rate = np.array(all_winloses).mean() if win_rate: avg_turns = np.array(all_turns).mean() else: avg_turns = 0 to_print = [win_rate, avg_turns] print("Play with Topic Guessers, win_rate {:.2f}, avg_turn {:.2f}".format( *to_print)) return win_rate, avg_turns
def add_experience(giver, guesser, words): word = random.choice(words) try: _, winlose, clues, guesses = play_a_word(word, giver, guesser) except ValueError: return rewards = get_immediate_rewards(guesser, clues, guesses) guesser.update_memory((clues, guesses, winlose, rewards))
def add_experience(giver, guesser, words): word = random.choice(words) try: _, winlose, clues, guesses = play_a_word(word, giver, guesser) except ValueError: return if isinstance(giver, PGACAgent): rewards = get_immediate_rewards(giver, clues, guesses, topics=guesser.topics) giver.update_memory((word, clues, guesses, winlose, rewards))
def add_experience(giver, guesser, words): word = random.choice(words) topic = make_new_topic(word, giver.strength_dict) guesser.set_topic(topic) try: _, winlose, clues, guesses = play_a_word(word, giver, guesser) except ValueError: return if isinstance(giver, PGACAgent): rewards = get_immediate_rewards(giver, clues, guesses) giver.update_memory((word, clues, guesses, 3 * winlose, rewards))
def add_experience(giver, guesser, words): word = random.choice(words) try: _, winlose, clues, guesses = play_a_word(word, giver, guesser, to_train=True) print(winlose, clues, guesses) except ValueError: return if isinstance(guesser, ActorCriticAgent): rewards = get_immediate_rewards(guesser, clues, guesses) guesser.update_memory((clues, guesses, winlose, rewards)) if isinstance(giver, ActorCriticAgent): rewards = get_immediate_rewards(giver, clues, guesses) giver.update_memory((word, clues, guesses, winlose, rewards))
def get_two_trace(target, giver): traces = [] guesses_set = set() for _ in range(1000): guesser = random.choice(guessers) _, is_win, clues, guesses = play_a_word(target, giver, guesser, to_train=False) print(is_win, clues, guesses) guesses = tuple(guesses) if guesses not in guesses_set: guesses_set.add(guesses) traces.append(clues) if len(traces) == 2: break return traces