Esempio n. 1
0
 def test_episodes(self):
     # Test the model on all episodes.
     # Success is determined by positive reward on the final step,
     # which works for BabyAI and Sokoban, but is not appropriate for many environments.
     NUM_EPISODES_TO_TEST = spec.val("NUM_EPISODES_TO_TEST")
     MIN_FINAL_REWARD_FOR_SUCCESS = spec.val("MIN_FINAL_REWARD_FOR_SUCCESS")
     self.create_results_output_file()
     spec.output_to_file(self.output_filename)
     num_wins = 0
     num_episodes_tested = 0
     self.output("Testing on {} episodes.".format(NUM_EPISODES_TO_TEST))
     start_time = time.time()
     for episode_id in range(NUM_EPISODES_TO_TEST):
         torch.manual_seed(AGENT_RANDOM_SEED)
         final_reward, steps = self.test_on_episode(episode_id)
         if final_reward >= MIN_FINAL_REWARD_FOR_SUCCESS:
             num_wins += 1
         num_episodes_tested += 1
         if (num_episodes_tested % (NUM_EPISODES_TO_TEST / 10) == 0):
             self.output('{:4d} / {:5d}  =  {:5.1f}%'.format(
                 num_wins, num_episodes_tested,
                 100.0 * num_wins / num_episodes_tested))
     self.output("Time: {:3.1f} min".format(
         (time.time() - start_time) / 60.))
     self.output("Success rate = {} / {} episodes = {:5.1f}%".format(
         num_wins, num_episodes_tested,
         100.0 * num_wins / num_episodes_tested))
Esempio n. 2
0
 def create_environment(self, seed=None):
     # Each new environment should be listed here.
     if ENV == "Pathfinding_Env":
         from environments.pathfinding import Pathfinding_Env
         environment = Pathfinding_Env(seed)
     elif ENV == "BabyAI_Env":
         from environments.babyai import BabyAI_Env
         environment = BabyAI_Env(seed)
         HELDOUT_TESTING = spec.val("HELDOUT_TESTING")
         self.heldout_testing = HELDOUT_TESTING
     elif ENV == "Sokoban_Env":
         from environments.sokoban import Sokoban_Env
         environment = Sokoban_Env(seed)
     else:
         print("Environment {} not found.".format(ENV))
         exit(0)
     self.observation_space = environment.observation_space
     self.action_space = environment.action_space
     return environment
Esempio n. 3
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import collections
import numpy as np
import copy

from utils.spec_reader import spec

hidden_size = spec.val("DNC_HIDDEN_SIZE")
memory_size = spec.val("DNC_MEMORY_SIZE")
word_size = spec.val("DNC_WORD_SIZE")
num_write_heads = spec.val("DNC_NUM_WRITE_HEADS")
num_read_heads = spec.val("DNC_READ_HEADS")
num_read_modes = 1 + 2 * num_write_heads
batch_size = 1

TemporalLinkageState = collections.namedtuple('TemporalLinkageState',
                                              ('link', 'precedence_weights'))

AccessState = collections.namedtuple(
    'AccessState',
    ('memory', 'read_weights', 'write_weights', 'linkage', 'usage'))

_EPSILON = 0.001


class MemoryModule(nn.Module):
    def __init__(self):
        super(MemoryModule, self).__init__()
Esempio n. 4
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch
import torch.nn as nn
import torch.nn.functional as F
from agents.networks.shared.general import LinearLayer, SeparateActorCriticLayers

from utils.spec_reader import spec
NUM_RNN_UNITS = spec.val("NUM_RNN_UNITS")
AC_HIDDEN_LAYER_SIZE = spec.val("AC_HIDDEN_LAYER_SIZE")
OBS_EMBED_SIZE = spec.val("OBS_EMBED_SIZE")


class GRU_Network(nn.Module):
    def __init__(self, input_size, action_space_size):
        super(GRU_Network, self).__init__()
        next_input_size = input_size
        if OBS_EMBED_SIZE > 0:
            self.obs_emb = LinearLayer(next_input_size, OBS_EMBED_SIZE)
            next_input_size = OBS_EMBED_SIZE
        self.num_rnn_units = NUM_RNN_UNITS
        self.rnn = nn.GRUCell(next_input_size, self.num_rnn_units)
        self.actor_critic_layers = SeparateActorCriticLayers(
            self.num_rnn_units, 2, AC_HIDDEN_LAYER_SIZE, action_space_size)

    def forward(self, obs, old_state):
        tens = torch.FloatTensor(obs).unsqueeze(0)
        if OBS_EMBED_SIZE > 0:
            tens = self.obs_emb(tens)
        new_state = self.rnn(tens, old_state)
        policy, value_est = self.actor_critic_layers(new_state)
Esempio n. 5
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import numpy as np
import random

from utils.spec_reader import spec
NUM_PATTERNS = spec.val("NUM_PATTERNS")

PATTERN_LENGTH = 7


class Link(object):
    def __init__(self, graph, src, tar):
        self.graph = graph
        self.src = src
        self.tar = tar
        assert (graph.path_len[src][tar] == 0)
        graph.path_len[src][tar] = 1

    def output(self):
        print("Link  {} -> {}".format(self.src, self.tar))


class Graph(object):
    def __init__(self, rand):
        self.rand = rand

    def reset(self):
        # Links are represented by objects, but nodes are represented by indices.
        # Keep in mind that the nodes in this graph do not (necessarily) correspond to transformer nodes.
Esempio n. 6
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch
import torch.nn as nn
import numpy as np

from agents.networks.shared.transformer import Transformer
from agents.networks.shared.general import LinearLayer
from agents.networks.shared.general import SeparateActorCriticLayers
from agents.networks.shared.general import SharedActorCriticLayers
from utils.graph import Graph

from utils.spec_reader import spec
V2 = spec.val("V2")
WMG_ATTENTION_HEAD_SIZE = spec.val("WMG_ATTENTION_HEAD_SIZE")
WMG_NUM_ATTENTION_HEADS = spec.val("WMG_NUM_ATTENTION_HEADS")
WMG_NUM_LAYERS = spec.val("WMG_NUM_LAYERS")
WMG_HIDDEN_SIZE = spec.val("WMG_HIDDEN_SIZE")
AC_HIDDEN_LAYER_SIZE = spec.val("AC_HIDDEN_LAYER_SIZE")
WMG_MAX_OBS = spec.val("WMG_MAX_OBS")
WMG_MAX_MEMOS = spec.val("WMG_MAX_MEMOS")

# Set WMG_MAX_MEMOS > 0 for attention over Memos, stored in a StateMatrix.
# Set WMG_MAX_OBS > 0 for attention over past observations, stored in a StateMatrix.
# Attention over both would require two separate instances of StateMatrix.
# The WMG experiments did not explore this combination, so there's only one StateMatrix.
if WMG_MAX_MEMOS:
    # StateMatrix contains Memos.
    S = WMG_MAX_MEMOS  # Maximum number of state vectors stored in the matrix.
    WMG_MEMO_SIZE = spec.val("WMG_MEMO_SIZE")
    assert WMG_MAX_OBS == 0
Esempio n. 7
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from utils.spec_reader import spec
AGENT_RANDOM_SEED = spec.val("AGENT_RANDOM_SEED")
A3C_T_MAX = spec.val("A3C_T_MAX")
LEARNING_RATE = spec.val("LEARNING_RATE")
DISCOUNT_FACTOR = spec.val("DISCOUNT_FACTOR")
GRADIENT_CLIP = spec.val("GRADIENT_CLIP")
WEIGHT_DECAY = spec.val("WEIGHT_DECAY")
AGENT_NET = spec.val("AGENT_NET")
ENTROPY_TERM_STRENGTH = spec.val("ENTROPY_TERM_STRENGTH")
REWARD_SCALE = spec.val("REWARD_SCALE")
ADAM_EPS = spec.val("ADAM_EPS")
ANNEAL_LR = spec.val("ANNEAL_LR")
if ANNEAL_LR:
    LR_GAMMA = spec.val("LR_GAMMA")
    from torch.optim.lr_scheduler import StepLR

torch.manual_seed(AGENT_RANDOM_SEED)


class A3cAgent(object):
    ''' A single-worker version of Asynchronous Advantage Actor-Critic (Mnih et al., 2016)'''
    def __init__(self, observation_space_size, action_space_size):
        if AGENT_NET == "GRU_Network":
            from agents.networks.gru import GRU_Network
Esempio n. 8
0
# Adapted from https://github.com/mpSchrader/gym-sokoban
# Max-Philipp B. Schrader, 2018.

import os
from os import listdir
from os import path
from os.path import isfile, join
import random
import numpy as np
from utils.graph import Graph
from utils.graph import Entity
import zipfile

from utils.spec_reader import spec
SOKOBAN_MAX_STEPS = spec.val("SOKOBAN_MAX_STEPS")
SOKOBAN_DIFFICULTY = spec.val("SOKOBAN_DIFFICULTY")
SOKOBAN_SPLIT = spec.val("SOKOBAN_SPLIT")
SOKOBAN_ROOM_OVERRIDE = spec.val("SOKOBAN_ROOM_OVERRIDE")
SOKOBAN_BOXES_REQUIRED = spec.val("SOKOBAN_BOXES_REQUIRED")
SOKOBAN_OBSERVATION_FORMAT = spec.val("SOKOBAN_OBSERVATION_FORMAT")
SOKOBAN_REWARD_PER_STEP = spec.val("SOKOBAN_REWARD_PER_STEP")
SOKOBAN_REWARD_SUCCESS = spec.val("SOKOBAN_REWARD_SUCCESS")

PIXELS_PER_TILE = 6  # Each tile is one pixel in the original Sokoban images, 8 pixels per cell, and 10x10 cells in a puzzle.
TILES_PER_CELL = 8
PUZZLE_SCALE = PIXELS_PER_TILE * TILES_PER_CELL
PUZZLE_SIZE = 10

# Cell state codes
WALL = 0
Esempio n. 9
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import gym
import babyai  # This registers the 19 MiniGrid levels.
import numpy as np

from utils.spec_reader import spec
BABYAI_ENV_LEVEL = spec.val("BABYAI_ENV_LEVEL")
USE_SUCCESS_RATE = spec.val(
    "USE_SUCCESS_RATE")  # Used by post-processing files.
SUCCESS_RATE_THRESHOLD = spec.val("SUCCESS_RATE_THRESHOLD")
HELDOUT_TESTING = spec.val("HELDOUT_TESTING")
NUM_TEST_EPISODES = spec.val("NUM_TEST_EPISODES")
BINARY_REWARD = spec.val("BINARY_REWARD")
OBS_ENCODER = spec.val("OBS_ENCODER")

assert USE_SUCCESS_RATE

color_list = ['red', 'green', 'blue', 'purple', 'yellow', 'grey']
color_index_dict = {}
for i in range(len(color_list)):
    color_index_dict[color_list[i]] = i

cell_object_base_index = 4

action_list = ['go', 'pick', 'put', 'open']
action_index_dict = {}
for i in range(len(action_list)):
    action_index_dict[action_list[i]] = i

objtype_list = ['door', 'key', 'ball', 'box']
Esempio n. 10
0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import time
import datetime
import pytz
import platform
import torch
import numpy as np

from utils.spec_reader import spec
TYPE_OF_RUN = spec.val("TYPE_OF_RUN")
ENV = spec.val("ENV")
LOAD_MODEL_FROM = spec.val("LOAD_MODEL_FROM")
SAVE_MODELS_TO = spec.val("SAVE_MODELS_TO")
ENV_RANDOM_SEED = spec.val("ENV_RANDOM_SEED")
AGENT_RANDOM_SEED = spec.val("AGENT_RANDOM_SEED")
REPORTING_INTERVAL = spec.val("REPORTING_INTERVAL")
TOTAL_STEPS = spec.val("TOTAL_STEPS")
ANNEAL_LR = spec.val("ANNEAL_LR")
if ANNEAL_LR:
    ANNEALING_START = spec.val("ANNEALING_START")

from agents.a3c import A3cAgent


class Worker(object):
    def __init__(self):
        torch.manual_seed(AGENT_RANDOM_SEED)
        self.start_time = time.time()
        self.heldout_testing = False