seed, wrapper_kwargs={ 'clip_rewards': False, 'episode_life': False, }) env = VecFrameStack(env, 4) agent = PPO2Agent(env, env_type, stochastic) demonstrations, learning_returns, learning_rewards = generate_mean_map_noop_demos( env) # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s) print("loading policy", args.pretrained_network) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") reward_net = EmbeddingNet(args.encoding_dims) reward_net.load_state_dict( torch.load(args.pretrained_network, map_location=device)) #reinitialize last layer num_features = reward_net.fc2.in_features print("reward is linear combination of ", num_features, "features") reward_net.fc2 = nn.Linear( num_features, 1, bias=False) #last layer just outputs the scalar reward = w^T \phi(s) reward_net.to(device) #freeze all weights so there are no gradients (we'll manually update the last layer via proposals so no grads required) for param in reward_net.parameters(): param.requires_grad = False #get num_demos by num_features + 1 (bias) numpy array with (un-discounted) feature counts from pretrained network
demonstrations = [x for _, x in sorted(zip(learning_returns,demonstrations), key=lambda pair: pair[0])] sorted_returns = sorted(learning_returns) print(sorted_returns) print("lengths") print([len(d) for d in demonstrations]) # Now we download a pretrained network to form \phi(s) the state features where the reward is now w^T \phi(s) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if args.trex: print("using TREX network from ICML") reward_net = Net() else: reward_net = EmbeddingNet(args.encoding_dims) reward_net.load_state_dict(torch.load(args.pretrained_network, map_location=device)) #reinitialize last layer num_features = reward_net.fc2.in_features print("reward is linear combination of ", num_features, "features") reward_net.to(device) #freeze all weights so there are no gradients (we'll manually update the last layer via proposals so no grads required) for param in reward_net.parameters(): param.requires_grad = False #get num_demos by num_features + 1 (bias) numpy array with (un-discounted) feature counts from pretrained network directories = args.pretrained_network.split("/") #split on directories to get the last past filename = directories[-1] #last element should be the name of the pretrained network fname = filename.split(".")[0] #get first part before the .params_... demo_cnts = generate_feature_counts(demonstrations, reward_net) #compute the fcounts
# class Net(nn.Module): # def __init__(self): # super().__init__() # # self.conv1 = nn.Conv2d(4, 16, 7, stride=3) # self.conv2 = nn.Conv2d(16, 16, 5, stride=2) # self.conv3 = nn.Conv2d(16, 16, 3, stride=1) # self.conv4 = nn.Conv2d(16, 16, 3, stride=1) # # # This is the width of the layer between the convolved framestack # # and the actual latent space. Scales with ENCODING_DIMS # intermediate_dimension = min(784, max(64, ENCODING_DIMS*2)) # # # Brings the convolved frame down to intermediate dimension just # # before being sent to latent space # self.fc1 = nn.Linear(784, intermediate_dimension) # # # This brings from intermediate dimension to latent space. Named mu # # because in the full network it includes a var also, to sample for # # the autoencoder # self.fc_mu = nn.Linear(intermediate_dimension, ENCODING_DIMS) # # # This is the actual T-REX layer; linear comb. from ENCODING_DIMS # self.fc2 = nn.Linear(ENCODING_DIMS, 1) net = EmbeddingNet(ENCODING_DIMS) sd = net.state_dict() sd.update({k: v for k, v in model.items() if k in net.state_dict()}) torch.save(sd, sys.argv[2])
type=int, default=50000, help="how long to run before truncating policy") args = parser.parse_args() env_name = args.env_name #set seeds seed = int(args.seed) torch.manual_seed(seed) np.random.seed(seed) tf.set_random_seed(seed) network_file_loc = args.pretrained_network print("Using network at", network_file_loc, "for features.") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") feature_net = EmbeddingNet(args.encoding_dims) state_dict = torch.load(network_file_loc, map_location=device) print(state_dict.keys()) feature_net.load_state_dict( torch.load(network_file_loc, map_location=device)) feature_net.to(device) print("evaluating", args.checkpointpath) print("*" * 10) print(env_name) print("*" * 10) returns, ave_feature_counts, fcounts, num_steps = get_policy_feature_counts( env_name, args.checkpointpath, feature_net, args.num_rollouts, args.max_length, args.no_op) print("returns", returns) print("feature counts", ave_feature_counts)
### This code will take in any pretrained network and compute the expected feature counts via Monte Carlo sampling according to the last ### layer of the pretrained network import os import sys import pickle import gym import time import numpy as np import random import torch from run_test import * #import matplotlib.pylab as plt import argparse from StrippedNet import EmbeddingNet from baselines.common.trex_utils import preprocess import utils network_file_loc = "/home/dsbrown/Code/deep-bayesian-irl/pretrained_networks/auxloss/breakout_64_all.params_stripped.params" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") feature_net = EmbeddingNet(64) state_dict = torch.load(network_file_loc, map_location=device) print(state_dict.keys()) print(state_dict['fc2.bias']) feature_net.load_state_dict(torch.load(network_file_loc, map_location=device))