Python evaluate_vs_gnubg 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: gnubg.gnubg_backgammon

메소드/함수: evaluate_vs_gnubg

hotexamples.com에서의 예제들: 3

Python evaluate_vs_gnubg - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 gnubg.gnubg_backgammon.evaluate_vs_gnubg에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def args_gnubg(args):
    model_agent0 = args.model_agent0
    model_type = args.type
    hidden_units_agent0 = args.hidden_units_agent0
    n_episodes = args.episodes
    host = args.host
    port = args.port
    difficulty = args.difficulty

    if path_exists(model_agent0):
        # assert os.path.exists(model_agent0), print("The path {} doesn't exists".format(model_agent0))
        if model_type == 'nn':
            net0 = TDGammon(hidden_units=hidden_units_agent0,
                            lr=0.1,
                            lamda=None,
                            init_weights=False)
        else:
            net0 = TDGammonCNN(lr=0.0001)

        net0.load(checkpoint_path=model_agent0,
                  optimizer=None,
                  eligibility_traces=False)

        gnubg_interface = GnubgInterface(host=host, port=port)
        gnubg_env = GnubgEnv(gnubg_interface,
                             difficulty=difficulty,
                             model_type=model_type)
        evaluate_vs_gnubg(agent=TDAgentGNU(WHITE,
                                           net=net0,
                                           gnubg_interface=gnubg_interface),
                          env=gnubg_env,
                          n_episodes=n_episodes)

예제 #2

파일 보기

def args_gnubg(args):
    model_agent0 = args.model_agent0
    model_type = args.type
    hidden_units_agent0 = args.hidden_units_agent0
    n_episodes = args.episodes
    host = args.host
    port = args.port
    difficulty = args.difficulty
    iterations = args.iterations

    experiment = "/saved_models/" + model_agent0
    folder = os.getcwd() + experiment
    directory = os.fsencode(folder)

    max_it_sizes = 0
    for file in os.listdir(directory):
        filename = os.fsdecode(file)

        # Find by chosen iteration amount
        if iterations:
            if filename.endswith("{}.tar".format(str(iterations))):
                final_file = filename
                break

        # Otherwise the biggest iteration amount
        else:
            if filename.endswith(".tar"):
                size = filename.split('_')[-1][:-4]
                if int(size) > max_it_sizes:
                    max_it_sizes = int(size)

        if filename.endswith("{}.tar".format(str(max_it_sizes))):
            final_file = filename

    if path_exists(folder + '/' + final_file):
        # assert os.path.exists(model_agent0), print("The path {} doesn't exists".format(model_agent0))
        if model_type == 'nn':
            net0 = TDGammon(hidden_units=hidden_units_agent0,
                            lr=0.1,
                            lamda=None,
                            init_weights=False)
        else:
            net0 = TDGammonCNN(lr=0.0001)

        net0.load(checkpoint_path=folder + '/' + final_file,
                  optimizer=None,
                  eligibility_traces=False)

        gnubg_interface = GnubgInterface(host=host, port=port)
        gnubg_env = GnubgEnv(gnubg_interface,
                             difficulty=difficulty,
                             model_type=model_type)
        evaluate_vs_gnubg(agent=TDAgentGNU(WHITE,
                                           net=net0,
                                           gnubg_interface=gnubg_interface),
                          env=gnubg_env,
                          n_episodes=n_episodes,
                          difficulty=difficulty,
                          model=model_agent0)

예제 #3

파일 보기

def args_plot(args, parser):
    '''
    This method is used to plot the number of time an agent wins when it plays against an opponent.
    Instead of evaluating the agent during training (it can require some time and slow down the training), I decided to plot the wins separately, loading the different
    model saved during training.
    For example, suppose I run the training for 100 games and save my model every 10 games.
    Later I will load these 10 models, and for each of them, I will compute how many times the agent would win against an opponent.
    :return: None
    '''

    src = args.save_path
    hidden_units = args.hidden_units
    n_episodes = args.episodes
    opponents = args.opponent.split(',')
    host = args.host
    port = args.port
    difficulties = args.difficulty.split(',')
    model_type = args.type

    if path_exists(src):
        # assert os.path.exists(src), print("The path {} doesn't exists".format(src))

        for d in difficulties:
            if d not in [
                    'beginner', 'intermediate', 'advanced', 'world_class'
            ]:
                parser.error(
                    "--difficulty should be (one or more of) 'beginner','intermediate', 'advanced' ,'world_class'"
                )

        dst = args.dst

        if 'gnubg' in opponents and (not host or not port):
            parser.error(
                "--host and --port are required when 'gnubg' is specified in --opponent"
            )

        for root, dirs, files in os.walk(src):
            global_step = 0
            files = sorted(files)

            writer = SummaryWriter(dst)

            for file in files:
                if ".tar" in file:
                    print("\nLoad {}".format(os.path.join(root, file)))

                    if model_type == 'nn':
                        net = TDGammon(hidden_units=hidden_units,
                                       lr=0.1,
                                       lamda=None,
                                       init_weights=False)
                        env = gym.make('gym_backgammon:backgammon-v0')
                    else:
                        net = TDGammonCNN(lr=0.0001)
                        env = gym.make('gym_backgammon:backgammon-pixel-v0')

                    net.load(checkpoint_path=os.path.join(root, file),
                             optimizer=None,
                             eligibility_traces=False)

                    if 'gnubg' in opponents:
                        tag_scalar_dict = {}

                        gnubg_interface = GnubgInterface(host=host, port=port)

                        for difficulty in difficulties:
                            gnubg_env = GnubgEnv(gnubg_interface,
                                                 difficulty=difficulty,
                                                 model_type=model_type)
                            wins = evaluate_vs_gnubg(agent=TDAgentGNU(
                                WHITE,
                                net=net,
                                gnubg_interface=gnubg_interface),
                                                     env=gnubg_env,
                                                     n_episodes=n_episodes)
                            tag_scalar_dict[difficulty] = wins[WHITE]

                        writer.add_scalars('wins_vs_gnubg/', tag_scalar_dict,
                                           global_step)

                        with open(root + '/results.txt', 'a') as f:
                            print("{};".format(file) + str(tag_scalar_dict),
                                  file=f)

                    if 'random' in opponents:
                        tag_scalar_dict = {}
                        agents = {
                            WHITE: TDAgent(WHITE, net=net),
                            BLACK: RandomAgent(BLACK)
                        }
                        wins = evaluate_agents(agents, env, n_episodes)

                        tag_scalar_dict['random'] = wins[WHITE]

                        writer.add_scalars('wins_vs_random/', tag_scalar_dict,
                                           global_step)

                    global_step += 1

                    writer.close()