def start(self): assert self.context is not None self.context.start() self.dc = DataChannelManager([self.act1_dc, self.act2_dc]) return self.agent1, self.agent2
# Copyright (c) Facebook, Inc. and its affiliates.
logger_path = os.path.join(args.save_dir, 'train.log') sys.stdout = Logger(logger_path) device = torch.device('cuda:%d' % args.gpu) model1 = load_model(args.coach1, args.executor1, args) model2 = load_model(args.coach2, args.executor2, args) game_option = get_game_option(args) ai1_option, ai2_option = get_ai_options( args, [model1.coach.num_instructions, model2.coach.num_instructions]) context, act1_dc, act2_dc = create_game(args.num_thread, ai1_option, ai2_option, game_option) context.start() dc = DataChannelManager([act1_dc, act2_dc]) result1 = ResultStat('reward', None) result2 = ResultStat('reward', None) i = 0 while not context.terminated(): i += 1 if i % 1000 == 0: print('%d, progress agent1: win %d, loss %d' % (i, result1.win, result1.loss)) data = dc.get_input(max_timeout_s=1) if len(data) == 0: continue for key in data: # print(key)
device = torch.device('cuda:%d' % args.gpu) coach = ConvRnnCoach.load(args.coach_path).to(device) coach.max_raw_chars = args.max_raw_chars executor = Executor.load(args.model_path).to(device) executor_wrapper = ExecutorWrapper(coach, executor, coach.num_instructions, args.max_raw_chars, args.cheat, args.inst_mode) executor_wrapper.train(False) game_option = get_game_option(args) ai1_option, ai2_option = get_ai_options(args, coach.num_instructions) context, act_dc = create_game(args.num_thread, ai1_option, ai2_option, game_option) context.start() dc = DataChannelManager([act_dc]) result_stat = ResultStat('reward', None) while not context.terminated(): data = dc.get_input(max_timeout_s=1) if len(data) == 0: continue data = to_device(data['act'], device) result_stat.feed(data) reply = executor_wrapper.forward(data) dc.set_reply('act', reply) print(result_stat.log(0)) dc.terminate()
# Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # import argparse import os import sys import pprint from set_path import append_sys_path append_sys_path() import torch import tube from pytube import DataChannelManager import minirts import numpy as np import random import pickle from collections import defaultdict from rnn_coach import ConvRnnCoach from onehot_coach import ConvOneHotCoach from rnn_generator import RnnGenerator from itertools import groupby from executor_wrapper import ExecutorWrapper from executor import Executor from common_utils import to_device, ResultStat, Logger from best_models import best_executors, best_coaches from tqdm import tqdm p1dict = defaultdict(list)
def run_eval(args, model1, model2, device, num_games=100): num_eval_games = num_games result1 = ResultStat("reward", None) result2 = ResultStat("reward", None) game_option = get_game_option(args) ai1_option, ai2_option = get_ai_options( args, [model1.coach.num_instructions, model2.coach.num_instructions]) if args.opponent == "sp": context, act1_dc, act2_dc = init_mt_games(num_eval_games, 0, args, ai1_option, ai2_option, game_option) pbar = tqdm(total=num_eval_games * 2) else: context, act1_dc, act2_dc = init_mt_games(0, num_eval_games, args, ai1_option, ai2_option, game_option) pbar = tqdm(total=num_eval_games) # context, act1_dc, act2_dc = init_games( # num_eval_games, ai1_option, ai2_option, game_option) context.start() dc = DataChannelManager([act1_dc, act2_dc]) i = 0 model1.eval() model2.eval() while not context.terminated(): i += 1 # if i % 1000 == 0: # print('%d, progress agent1: win %d, loss %d' % (i, result1.win, result1.loss)) data = dc.get_input(max_timeout_s=1) if len(data) == 0: continue for key in data: # print(key) batch = to_device(data[key], device) if key == "act1": batch["actor"] = "act1" ## Add batches to state table using sampling before adding ## Add based on the game_id result1.feed(batch) with torch.no_grad(): reply, _ = model1.forward(batch) # , exec_sample=True) elif key == "act2": batch["actor"] = "act2" result2.feed(batch) with torch.no_grad(): reply, _ = model2.forward(batch) else: assert False dc.set_reply(key, reply) game_ids = batch["game_id"].cpu().numpy() terminals = batch["terminal"].cpu().numpy().flatten() for i, g_id in enumerate(game_ids): if terminals[i] == 1: pbar.update(1) model1.eval() model2.eval() pbar.close() return result1, result2