コード例 #1
0
def benchmark_test(primary_model,
                   benchmark_model,
                   num_games,
                   benchmark_bot_name='benchmark_bot',
                   run_id=None):
    epsilon = Epsilon()
    player_1 = Agent(name=cfg.bot_1_name, model=primary_model, epsilon=epsilon)
    player_2 = Agent(name=benchmark_bot_name,
                     model=benchmark_model,
                     epsilon=epsilon)

    if 'policy_net' in dir(player_1.model):
        player_1.model.policy_net.eval()

    if 'policy_net' in dir(player_2.model):
        player_2.model.policy_net.eval()

    game_output = []
    for j in range(num_games):
        player_list = [player_1, player_2]
        game = Game(name="pinochle",
                    players=player_list,
                    run_id=run_id,
                    current_cycle=None)
        game.deal()
        game_output.append(game.play())

    winner_list, exp_df = pu.parse_game_output(game_output=game_output)

    if run_id is not None:  # Store history
        db.upload_exp(df=exp_df)

    return 1 - sum(winner_list) / len(winner_list)
コード例 #2
0
def trainAgent(tries, episodes, scenario, teacherAgent=None, feedbackStrategy=0, feedbackParameter=0.0):
    if teacherAgent == None:
        filenameSteps = resultsFolder + 'stepsRL.csv'
        filenameRewards = resultsFolder + 'rewardsRL.csv'
        filenameFailures = resultsFolder + 'failuresRL.csv'
    else:
        filenameSteps = resultsFolder + 'stepsIRL.csv'
        filenameRewards = resultsFolder + 'rewardsIRL.csv'
        filenameFailures = resultsFolder + 'failuresIRL.csv'

    files.createFile(filenameSteps)
    files.createFile(filenameRewards)
    files.createFile(filenameFailures)

    for i in range(tries):
        print('Training agent number: ' + str(i + 1))
        agent = Agent(scenario)
        [steps, rewards, failures] = agent.train(episodes, teacherAgent, feedbackStrategy, feedbackParameter)

        files.addToFile(filenameSteps, steps)
        files.addFloatToFile(filenameRewards, rewards)
        files.addToFile(filenameFailures, failures)
    # endfor

    return agent
コード例 #3
0
ファイル: dla.py プロジェクト: nclslbrn/blender_script
def initAgents():

    for a in range(agentNum-1):

        newAgent = Agent(size=agentSize, x=0, y=0, z=0)
        newAgent.onRadius(
            size=agentSize,
            limit=diffusionLimit
        )
        agents.append(newAgent)
コード例 #4
0
def trainAgent(tries, episodes, scenario, explorationstrat, learning_alg, alpha, gamma, epsilon, sigma, delta, tau, suffix):

    filenameSteps = resultsFolder + 'stepsRL_' + str(suffix) + '.csv'
    filenameRewards = resultsFolder + 'rewardsRL_' + str(suffix) + '.csv'
        
    files.createFile(filenameSteps)
    files.createFile(filenameRewards)

    for i in range(tries):
        print('Training agent number: ' + str(i+1))
        agent = Agent(scenario, alpha, gamma, epsilon, sigma, delta, tau)
        [steps, rewards] = agent.train(episodes, explorationstrat, learning_alg)
        
        files.addToFile(filenameSteps, steps)
        files.addFloatToFile(filenameRewards, rewards)
    #endfor
        
    return agent
コード例 #5
0
def human_test(model):
    epsilon = Epsilon()
    player_1 = Agent(name=cfg.bot_1_name, model=model, epsilon=epsilon)
    player_2 = Human("YOU")

    if 'policy_net' in dir(model):
        model.policy_net.eval()

    # Set logging level to debug
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info("Human test enabled, initializing AI uprising...")

    # Initialize game
    player_list = [player_1, player_2]
    game = Game(name="pinochle",
                players=player_list,
                run_id=None,
                current_cycle=None,
                human_test=True)
    game.deal()
    game.play()

    # Set logging level back to config
    logging.getLogger().setLevel(cfg.logging_level)
コード例 #6
0
ファイル: dla.py プロジェクト: nclslbrn/blender_script
                            # add the agent to the tree
                            tree.append(agents[a])
                            writer.writerow([
                                agents[a].size,
                                agents[a].x,
                                agents[a].y,
                                agents[a].z
                            ])
                            # change constants
                            if agentSize > minAgentSize:
                                agentSize *= shrink

                            # reset the agent
                            del agents[a]
                            newAgent = Agent(x=0, y=0, z=0, size=agentSize)
                            newAgent.onRadius(
                                limit=diffusionLimit,
                                size=agentSize
                            )
                            agents.append(newAgent)

                        if diffusionLimit < maxDiffusionDistance:

                            if abs(tree[c].x) >= diffusionLimit*0.99:
                                diffusionLimit = round(abs(tree[c].x)*1.025, 1)

                            if abs(tree[c].y) >= diffusionLimit*0.99:
                                diffusionLimit = round(abs(tree[c].y)*1.025, 1)

                            if abs(tree[c].z) >= diffusionLimit*0.99:
コード例 #7
0
ファイル: main.py プロジェクト: Atrus619/DeckOfCards
#
# self.train_self(num_epochs=5,
#                 exp_gen=exp_gen,
#                 is_storing_history=True)

# states = data.state.to(self.device)
# actions = data.action.to(self.device)
# meld_actions = data.meld_action.to(self.device)
# next_states = data.next_state.to(self.device)
# rewards = data.reward.to(self.device)
#
# pu.train_model(model=self, config=cfg)

epsilon = Epsilon('eval')
player_1 = Agent(name=cfg.random_bot_name + '1',
                 model=RandomBot(),
                 epsilon=epsilon)
player_2 = Agent(name=cfg.random_bot_name + '2',
                 model=RandomBot(),
                 epsilon=epsilon)
player_human = Human(name='Me, no you?')

player_list = [player_1, player_2]

game = Game(name=cfg.game,
            players=player_list,
            run_id='TEST',
            current_cycle=None)
game.deal()
winner_index, exp_df = game.play()
db.upload_exp(df=exp_df)
コード例 #8
0
def round_robin(model_list,
                num_games,
                verbose=True,
                plot=True,
                device='cuda:0'):
    start_time = time.time()
    epsilon = Epsilon

    model_wins = OrderedDict()
    for i, model in enumerate(model_list):
        model_wins[f'Player {i}'] = [0, model]
        if 'device' in dir(model) and model.device != device:
            model.policy_net = model.policy_net.to(device)

    for i, p1_model in enumerate(model_list):
        for j, p2_model in enumerate(model_list):
            if i < j:
                round_start_time = time.time()

                p1 = Agent(name=f'Player {i}', model=p1_model, epsilon=epsilon)
                p2 = Agent(name=f'Player {j}', model=p2_model, epsilon=epsilon)

                if verbose:
                    print(f'Player {i} vs. Player {j}...')

                p1_wins = int(
                    benchmark_test(primary_model=p1_model,
                                   benchmark_model=p2_model,
                                   num_games=num_games) * num_games)
                p2_wins = int(num_games - p1_wins)

                if verbose:
                    print(
                        f'Player {i}: {p1_wins}\tPlayer {j}: {p2_wins}\tDuration: {util.get_pretty_time(time.time() - round_start_time)}'
                    )
                    print(cs.DIVIDER)

                model_wins[p1.name][0] += p1_wins
                model_wins[p2.name][0] += p2_wins

    output = sorted(model_wins.items(), key=lambda kv: kv[1][0], reverse=True)

    if verbose:
        for i, model in enumerate(output):
            print(f'Rank {i+1}: {model[0]} with {model[1][0]} wins')
        total_games = len(model_list) / 2 * (len(model_list) - 1) * num_games
        total_duration = time.time() - start_time
        avg_time_per_game = total_duration / total_games
        print(
            f'{total_games} total games played over {util.get_pretty_time(total_duration)} ({util.get_pretty_time(avg_time_per_game)} per game)'
        )

    if plot:
        xs = [x[0] for x in model_wins.items()]
        heights = [x[1][0] for x in model_wins.items()]
        plt.bar(height=heights, x=xs)
        plt.title('Round Robin Tournament Results')
        plt.xlabel('Model')
        plt.ylabel('Total Number of Wins')

    return output
コード例 #9
0
ファイル: experiment.py プロジェクト: Atrus619/DeckOfCards
def run_full_experiment(config):
    # archiving old experience
    db.archive_exp(db.get_all_exp())
    db.delete_all_exp()

    util.setup_file_logger(name=config.run_id, filename=config.run_id)
    logger = logging.getLogger(config.run_id)
    start_time = time.time()

    # Define players
    model_1 = DQN(run_id=config.run_id, **config.DQN_params)
    model_2 = model_1.copy()
    epsilon = Epsilon(epsilon_func=config.epsilon_func,
                      max_epsilon=config.max_epsilon,
                      min_epsilon=config.min_epsilon,
                      eval_epsilon=config.eval_epsilon,
                      num_cycles=config.num_cycles,
                      decrement=config.epsilon_decrement)

    player_list = [
        Agent(name=config.bot_1_name, model=model_1, epsilon=epsilon),
        Agent(name=config.bot_2_name, model=model_2, epsilon=epsilon)
    ]

    winner_list = []
    previous_experience_id = 0

    util.save_config(config=config, path=config.run_id)

    # For each cycle
    logger.info('Beginning run titled: ' + config.run_id)
    logger.info(cs.DIVIDER)

    for i in range(1, config.num_cycles + 1):
        # For each episode, play through episode and insert each state/action pair into the database
        logger.info('Beginning cycle: ' + str(i) + ' / ' +
                    str(config.num_cycles) + '\tCumulative Time Elapsed: ' +
                    util.get_pretty_time(time.time() - start_time))
        logger.info(
            f'Current Epsilon: {epsilon.get_epsilon(current_cycle=i):.3f}')
        cycle_start_time = time.time()

        # Async parallelization. May want to consider doing cpu_count - 1 to allow user to do things while it runs. Sux cuz of memory copying I think.
        # with mp.Pool(mp.cpu_count() - 1) as pool:
        #     game_output = pool.starmap_async(parallel.play_game, [(config.game, player_list, config.run_id, i) for j in range(config.episodes_per_cycle)]).get()

        # Old serial method
        winner_list += pu.play_games(num_games=config.episodes_per_cycle,
                                     name=config.game,
                                     players=player_list,
                                     run_id=config.run_id,
                                     current_cycle=i,
                                     config=config)

        logger.info('Data collection complete.\tTotal Episode Time: ' +
                    util.get_pretty_time(time.time() - cycle_start_time))
        logger.info('Loading experience and training model...')
        training_start_time = time.time()

        # Import data from database based on experience replay buffer and train model
        pu.train_model(model=model_1, config=config)

        logger.info('Model training complete.\tTotal Training Time: ' +
                    util.get_pretty_time(time.time() - training_start_time))

        # Update model_2
        if i % config.player_2_update_freq == 0:
            logger.info(cs.DIVIDER)
            logger.info(
                'Storing history and setting model 2 equal to model 1...')
            player_list[0].model.policy_net.store_history()
            player_list[1].set_model(model=model_1.copy())

        # Benchmark
        if i % config.benchmark_freq == 0:
            logger.info(cs.DIVIDER)
            logger.info('Benchmarking...')

            # List of player 1's win rate against player 2 by cycle
            benchmark_cycle_win_rate = 1 - sum(winner_list) / len(winner_list)
            winner_list = []  # Reset winner list

            # Play against random bot and measure win rate
            random_win_rate = benchmark.benchmark_test(
                primary_model=model_1,
                benchmark_model=RandomBot(),
                benchmark_bot_name=config.random_bot_name,
                num_games=config.random_bot_cycles,
                run_id=config.run_id if config.log_random_benchmark else None)
            logger.info(
                f'Winrate vs. Random Bot: {random_win_rate * 100:.1f}%')

            # Play against expert policy bot and measure win rate
            # expert_policy_win_rate = benchmark.benchmark_test(primary_model=model_1, benchmark_model=ExpertPolicy(), benchmark_bot_name=config.expert_policy_bot_name,
            #                                                   num_games=config.random_bot_cycles, run_id=config.run_id if config.log_expert_policy_benchmark else None)
            # logger.info(f'Winrate vs. Expert Policy: {expert_policy_win_rate * 100:.1f}%')

            # Collect average reward from database
            average_reward = benchmark.get_average_reward(
                run_id=config.run_id,
                previous_experience_id=previous_experience_id,
                agent_id=config.bot_1_name,
                opponent_id=config.bot_2_name)
            db.insert_metrics(run_id=config.run_id,
                              win_rate=benchmark_cycle_win_rate,
                              win_rate_random=random_win_rate,
                              win_rate_expert_policy=0.0,
                              average_reward=average_reward)

            previous_experience_id = db.get_max_id(config.run_id)

        # Checkpoint
        if config.checkpoint_freq is not None and i % config.checkpoint_freq == 0:
            logger.info(cs.DIVIDER)
            logger.info('Model checkpoint reached. Saving checkpoint...')
            model_1.save(folder=os.path.join(config.checkpoint_folder,
                                             config.run_id),
                         title=util.get_checkpoint_model_name(cycle=i))

        logger.info('Cycle complete.\tTotal Cycle Time: ' +
                    util.get_pretty_time(time.time() - cycle_start_time))
        logger.info(cs.DIVIDER)

    logging.info('Training complete.\tTotal Run Time: ' +
                 util.get_pretty_time(time.time() - start_time) +
                 '\tSaving model and exiting...')
    model_1.save(title=config.run_id)