Esempio n. 1
0
    def reset(self):
        if self.__trainable and self.model.training:
            assert len(self.sa_buffer) == 0
        else:
            self.sa_buffer = StateActionBuffer(
                max_buffer_size=self.args.max_table_size,
                buffer_add_prob=self.args.sampling_freq,
            )

        self.result = ResultStat("reward", None)
        self.traj_dict = defaultdict(list)
        self.result_dict = {}
Esempio n. 2
0
    def __init__(
        self,
        coach,
        executor,
        device,
        args,
        writer,
        trainable=False,
        exec_sample=False,
        pg="ppo",
        tag="",
    ):

        self.__coach = coach
        self.__executor = executor
        self.__trainable = trainable
        self.device = device
        self.args = args
        self.tb_log = args.tb_log
        self.save_folder = None
        self.tag = tag
        self.best_test_win_pct = 0
        self.tb_writer = writer
        self.traj_dict = defaultdict(list)
        self.result_dict = {}
        self.result = ResultStat("reward", None)
        self.model = self.load_model(self.__coach, self.__executor, self.args)
        self.exec_sample = exec_sample

        print("Using pg {} algorithm".format(args.pg))
        if self.__trainable:
            # if args.split_train:
            #     self.executor_optimizer = optim.Adam(
            #         self.model.executor.parameters(), lr=args.lr
            #     )
            #     self.coach_optimizer = optim.Adam(
            #         self.model.coach.parameters(), lr=args.lr
            #     )
            # else:
            self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)
            self.sa_buffer = StateActionBuffer(
                max_buffer_size=args.max_table_size,
                buffer_add_prob=args.sampling_freq)
            # wandb.watch(self.model)
            self.pg = pg
        else:
            self.optimizer = None
            self.sa_buffer = None
            self.pg = None
Esempio n. 3
0
# Copyright (c) Facebook, Inc. and its affiliates.
Esempio n. 4
0
    device = torch.device('cuda:%d' % args.gpu)

    model1 = load_model(args.coach1, args.executor1, args)
    model2 = load_model(args.coach2, args.executor2, args)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(
        args, [model1.coach.num_instructions, model2.coach.num_instructions])

    context, act1_dc, act2_dc = create_game(args.num_thread, ai1_option,
                                            ai2_option, game_option)
    context.start()
    dc = DataChannelManager([act1_dc, act2_dc])

    result1 = ResultStat('reward', None)
    result2 = ResultStat('reward', None)
    i = 0
    while not context.terminated():
        i += 1
        if i % 1000 == 0:
            print('%d, progress agent1: win %d, loss %d' %
                  (i, result1.win, result1.loss))

        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        for key in data:
            # print(key)
            batch = to_device(data[key], device)
            if key == 'act1':
Esempio n. 5
0
    device = torch.device('cuda:%d' % args.gpu)
    coach = ConvRnnCoach.load(args.coach_path).to(device)
    coach.max_raw_chars = args.max_raw_chars
    executor = Executor.load(args.model_path).to(device)
    executor_wrapper = ExecutorWrapper(coach, executor, coach.num_instructions,
                                       args.max_raw_chars, args.cheat,
                                       args.inst_mode)
    executor_wrapper.train(False)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(args, coach.num_instructions)

    context, act_dc = create_game(args.num_thread, ai1_option, ai2_option,
                                  game_option)
    context.start()
    dc = DataChannelManager([act_dc])

    result_stat = ResultStat('reward', None)
    while not context.terminated():
        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        data = to_device(data['act'], device)
        result_stat.feed(data)
        reply = executor_wrapper.forward(data)

        dc.set_reply('act', reply)

    print(result_stat.log(0))
    dc.terminate()
Esempio n. 6
0
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import argparse
import os
import sys
import pprint
from set_path import append_sys_path
append_sys_path()
import torch
import tube
from pytube import DataChannelManager
import minirts
import numpy as np
import random
import pickle
from collections import defaultdict
from rnn_coach import ConvRnnCoach
from onehot_coach import ConvOneHotCoach
from rnn_generator import RnnGenerator
from itertools import groupby
from executor_wrapper import ExecutorWrapper
from executor import Executor
from common_utils import to_device, ResultStat, Logger
from best_models import best_executors, best_coaches
from tqdm import tqdm
p1dict = defaultdict(list)
Esempio n. 7
0
def run_eval(args, model1, model2, device, num_games=100):

    num_eval_games = num_games

    result1 = ResultStat("reward", None)
    result2 = ResultStat("reward", None)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(
        args, [model1.coach.num_instructions, model2.coach.num_instructions])

    if args.opponent == "sp":
        context, act1_dc, act2_dc = init_mt_games(num_eval_games, 0, args,
                                                  ai1_option, ai2_option,
                                                  game_option)
        pbar = tqdm(total=num_eval_games * 2)
    else:
        context, act1_dc, act2_dc = init_mt_games(0, num_eval_games, args,
                                                  ai1_option, ai2_option,
                                                  game_option)
        pbar = tqdm(total=num_eval_games)
    # context, act1_dc, act2_dc = init_games(
    #     num_eval_games, ai1_option, ai2_option, game_option)
    context.start()
    dc = DataChannelManager([act1_dc, act2_dc])

    i = 0
    model1.eval()
    model2.eval()

    while not context.terminated():
        i += 1
        # if i % 1000 == 0:
        #     print('%d, progress agent1: win %d, loss %d' % (i, result1.win, result1.loss))

        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        for key in data:
            # print(key)
            batch = to_device(data[key], device)
            if key == "act1":
                batch["actor"] = "act1"
                ## Add batches to state table using sampling before adding
                ## Add based on the game_id

                result1.feed(batch)
                with torch.no_grad():
                    reply, _ = model1.forward(batch)  # , exec_sample=True)

            elif key == "act2":
                batch["actor"] = "act2"
                result2.feed(batch)

                with torch.no_grad():
                    reply, _ = model2.forward(batch)

            else:
                assert False

            dc.set_reply(key, reply)

            game_ids = batch["game_id"].cpu().numpy()
            terminals = batch["terminal"].cpu().numpy().flatten()

            for i, g_id in enumerate(game_ids):
                if terminals[i] == 1:
                    pbar.update(1)

    model1.eval()
    model2.eval()
    pbar.close()

    return result1, result2
Esempio n. 8
0
# Copyright (c) Facebook, Inc. and its affiliates.
Esempio n. 9
0
# Copyright (c) Facebook, Inc. and its affiliates.