# Copyright (c) Facebook, Inc. and its affiliates.
device = torch.device('cuda:%d' % args.gpu) coach = ConvRnnCoach.load(args.coach_path).to(device) coach.max_raw_chars = args.max_raw_chars executor = Executor.load(args.model_path).to(device) executor_wrapper = ExecutorWrapper(coach, executor, coach.num_instructions, args.max_raw_chars, args.cheat, args.inst_mode) executor_wrapper.train(False) game_option = get_game_option(args) ai1_option, ai2_option = get_ai_options(args, coach.num_instructions) context, act_dc = create_game(args.num_thread, ai1_option, ai2_option, game_option) context.start() dc = DataChannelManager([act_dc]) result_stat = ResultStat('reward', None) while not context.terminated(): data = dc.get_input(max_timeout_s=1) if len(data) == 0: continue data = to_device(data['act'], device) result_stat.feed(data) reply = executor_wrapper.forward(data) dc.set_reply('act', reply) print(result_stat.log(0)) dc.terminate()
result1 = ResultStat('reward', None) result2 = ResultStat('reward', None) i = 0 while not context.terminated(): i += 1 if i % 1000 == 0: print('%d, progress agent1: win %d, loss %d' % (i, result1.win, result1.loss)) data = dc.get_input(max_timeout_s=1) if len(data) == 0: continue for key in data: # print(key) batch = to_device(data[key], device) if key == 'act1': result1.feed(batch) with torch.no_grad(): reply = model1.forward(batch) elif key == 'act2': result2.feed(batch) with torch.no_grad(): reply = model2.forward(batch) else: assert False dc.set_reply(key, reply) print(result1.log(0)) print(result2.log(0)) dc.terminate()
# Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # import argparse import os import sys import pprint from set_path import append_sys_path append_sys_path() import torch import tube from pytube import DataChannelManager import minirts import numpy as np import random import pickle from collections import defaultdict from rnn_coach import ConvRnnCoach from onehot_coach import ConvOneHotCoach from rnn_generator import RnnGenerator from itertools import groupby from executor_wrapper import ExecutorWrapper from executor import Executor from common_utils import to_device, ResultStat, Logger from best_models import best_executors, best_coaches from tqdm import tqdm p1dict = defaultdict(list)