Ejemplo n.º 1
0
# Copyright (c) Facebook, Inc. and its affiliates.
Ejemplo n.º 2
0
    device = torch.device('cuda:%d' % args.gpu)
    coach = ConvRnnCoach.load(args.coach_path).to(device)
    coach.max_raw_chars = args.max_raw_chars
    executor = Executor.load(args.model_path).to(device)
    executor_wrapper = ExecutorWrapper(coach, executor, coach.num_instructions,
                                       args.max_raw_chars, args.cheat,
                                       args.inst_mode)
    executor_wrapper.train(False)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(args, coach.num_instructions)

    context, act_dc = create_game(args.num_thread, ai1_option, ai2_option,
                                  game_option)
    context.start()
    dc = DataChannelManager([act_dc])

    result_stat = ResultStat('reward', None)
    while not context.terminated():
        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        data = to_device(data['act'], device)
        result_stat.feed(data)
        reply = executor_wrapper.forward(data)

        dc.set_reply('act', reply)

    print(result_stat.log(0))
    dc.terminate()
Ejemplo n.º 3
0
    result1 = ResultStat('reward', None)
    result2 = ResultStat('reward', None)
    i = 0
    while not context.terminated():
        i += 1
        if i % 1000 == 0:
            print('%d, progress agent1: win %d, loss %d' %
                  (i, result1.win, result1.loss))

        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        for key in data:
            # print(key)
            batch = to_device(data[key], device)
            if key == 'act1':
                result1.feed(batch)
                with torch.no_grad():
                    reply = model1.forward(batch)
            elif key == 'act2':
                result2.feed(batch)
                with torch.no_grad():
                    reply = model2.forward(batch)
            else:
                assert False
            dc.set_reply(key, reply)

    print(result1.log(0))
    print(result2.log(0))
    dc.terminate()
Ejemplo n.º 4
0
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import argparse
import os
import sys
import pprint
from set_path import append_sys_path
append_sys_path()
import torch
import tube
from pytube import DataChannelManager
import minirts
import numpy as np
import random
import pickle
from collections import defaultdict
from rnn_coach import ConvRnnCoach
from onehot_coach import ConvOneHotCoach
from rnn_generator import RnnGenerator
from itertools import groupby
from executor_wrapper import ExecutorWrapper
from executor import Executor
from common_utils import to_device, ResultStat, Logger
from best_models import best_executors, best_coaches
from tqdm import tqdm
p1dict = defaultdict(list)