Python Actor.load_state_dict Examples

Programming Language: Python

Namespace/Package Name: Model

Class/Type: Actor

Method/Function: load_state_dict

Examples at hotexamples.com: 7

The `load_state_dict` function is a method in the `Actor` model of Python. It is used to load the state dictionary of the model. The state dictionary contains the model's parameters and buffers. By using this method, one can load the previously saved state dictionary to recreate and restore the model with the same parameters and buffers.

Python Actor.load_state_dict - 7 examples found. These are the top rated real world Python examples of Model.Actor.load_state_dict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

eval(7)

load_state_dict(7)

Actor(3)

cuda(1)

Example #1

Show file

File: gdbp_falsification.py Project: jybai/MultiControllerAdapter

    def __init__(self,
                 base_model_paths,
                 switch_path,
                 device,
                 soft_choice=False):
        super(SwitchController, self).__init__()
        self.base_models = []
        for base_model_path in base_model_paths:
            base_model = Actor(state_size=2,
                               action_size=1,
                               seed=0,
                               fc1_units=25).to(device)
            base_model.load_state_dict(
                torch.load(base_model_path, map_location=device))
            base_model.eval()
            self.base_models.append(base_model)
        self.switch_model = DQN(2, 2).to(device)
        self.switch_model.load_state_dict(
            torch.load(switch_path, map_location=device))
        self.switch_model.eval()

        self.soft_choice = soft_choice

Example #2

Show file

# this file is to record the NN controller parameters into a txt file to be used 
# for Bernstein polynomial approximation by the tool of ReachNN
from Model import IndividualModel, Actor
import torch
import numpy as np


# NAME = 'direct_distill'
# trained_model = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=25)
# trained_model.load_state_dict(torch.load('./'+ NAME +'.pth'))
# trained_model.eval()
trained_model = Actor(state_size=3, action_size=1, seed=0, fc1_units=25)
trained_model.load_state_dict(torch.load("./actors/actor_0.43600.pth"))
trained_model.eval()
bias_list = []
weight_list = []
for name, param in trained_model.named_parameters():
	if 'bias' in name:
		bias_list.append(param.detach().cpu().numpy())
		
	if 'weight' in name:
		weight_list.append(param.detach().cpu().numpy())
print(len(weight_list), np.linalg.norm(weight_list[0]), np.linalg.norm(weight_list[1]))
# assert False
all_param = []

for i in range(len(bias_list)):
	for j in range(len(bias_list[i])):
		for k in range(weight_list[i].shape[1]):
			all_param.append(weight_list[i][j, k])
		all_param.append(bias_list[i][j])

Example #3

Show file

USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda(
) if USE_CUDA else autograd.Variable(*args, **kwargs)
batch_size = 128
gamma = 0.99
epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 3000
replay_buffer = ReplayBuffer(int(5e3))
epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_1 = Actor(state_size=3, action_size=1, seed=0, fc1_units=25).to(device)
model_1.load_state_dict(torch.load("./actors/actor_0.43600.pth"))
model_1.eval()

# model_2 = IndividualModel(state_size=3, action_size=1, seed=0, fc1_units=50).to(device)
# model_2.load_state_dict(torch.load("./actors/actor_1.0_2800.pth"))
# model_2.eval()


def MController(state):
    action = 0.634 * state[0] - 0.296 * state[1] - 0.153 * state[
        2] + 0.053 * state[0]**2 - 1.215 * state[0]**3
    return action


Individual = IndividualModel(state_size=3, action_size=1, seed=0,
                             fc1_units=25).to(device)

Example #4

Show file

File: adaptation.py Project: jybai/MultiControllerAdapter

		return len(self.buffer)

USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)
batch_size = 128
gamma = 0.99
epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 3000
replay_buffer = ReplayBuffer(int(5e3))
epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_1 = Actor(state_size=2, action_size=1, seed=0, fc1_units=25, fc2_units=None).to(device)
model_1.load_state_dict(torch.load("./models/actor_2800.pth"))
model_1.eval()

model_2 = Actor(state_size=2, action_size=1, seed=0, fc1_units=25).to(device)
model_2.load_state_dict(torch.load("./0731actors/actor_2400.pth"))
model_2.eval()

Individual = Individualtanh(state_size=2, action_size=1, seed=0, fc1_units=25).to(device)

agent = Agent(state_size=2, action_size=2, random_seed=0, fc1_units=None, fc2_units=None, weighted=True)

ppo = PPO(2, 2, method = 'clip')
ppo.load_model(3000, 1)

def mkdir(path):
	folder = os.path.exists(path)

Example #5

Show file

File: adaptation.py Project: wangyixu14/MultiControllerAdapter

USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda(
) if USE_CUDA else autograd.Variable(*args, **kwargs)
batch_size = 128
gamma = 0.99
epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 3000
replay_buffer = ReplayBuffer(int(5e3))
epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_1 = Actor(state_size=4, action_size=1, seed=0).to(device)
model_1.load_state_dict(torch.load("./actor5000_1.pth"))
model_1.eval()

model_2 = Actor(state_size=4, action_size=1, seed=0).to(device)
model_2.load_state_dict(torch.load("./actor4850_1.pth"))
model_2.eval()

Individual = Individualtanh(state_size=4, action_size=1, seed=0,
                            fc1_units=50).to(device)

agent = Agent(state_size=4, action_size=2, random_seed=0)

ppo = PPO(4, 2, method='penalty')
ppo.load_model(5499, 1)

Example #6

Show file

File: DDGP.py Project: tlalexander/GodotAIGym

class DDPG:
    def __init__(self,
                 env,
                 tau=1e-3,
                 gamma=0.99,
                 batch_size=64,
                 depsilon=50000):
        self.num_states = env.observation_space.shape[0]
        self.num_actions = env.action_space.shape[0]

        self.policy = Actor(self.num_states, self.num_actions).train()
        self.policy_target = Actor(self.num_states, self.num_actions).eval()
        self.hard_update(self.policy, self.policy_target)

        self.critic = Critic(self.num_states, self.num_actions).train()
        self.critic_target = Critic(self.num_states, self.num_actions).eval()
        self.hard_update(self.critic, self.critic_target)

        self.critic_loss = nn.MSELoss()

        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.epsilon = 1.0
        self.depsilon = 1.0 / float(depsilon)

        self.opt_critic = torch.optim.Adam(self.critic.parameters(), lr=1e-3)
        self.opt_policy = torch.optim.Adam(self.policy.parameters(), lr=1e-4)

        self.policy.cuda()
        self.policy_target.cuda()
        self.critic.cuda()
        self.critic_target.cuda()

    def train(self, buffer):
        b_state, b_action, b_reward, b_state_next, b_term = buffer.sample(
            self.batch_size)
        with torch.no_grad():
            action_target = self.policy_target(b_state_next)
            Q_prime = self.critic_target(b_state_next, action_target)

        self.opt_critic.zero_grad()
        Q = self.critic(b_state, b_action)
        L_critic = self.critic_loss(
            Q, b_reward + self.gamma * Q_prime * (1.0 - b_term))
        L_critic.backward()
        self.opt_critic.step()

        self.opt_policy.zero_grad()
        action = self.policy(b_state)
        L_Q = -1.0 * self.critic(b_state, action).mean()
        L_Q.backward()
        self.opt_policy.step()

        self.soft_update(self.critic, self.critic_target)
        self.soft_update(self.policy, self.policy_target)

        return L_critic.item(), L_Q.item()

    def get_entropy(self, buffer, m=5, n=100):
        # b_state, b_action, b_reward, b_state_next, b_term = buffer.sample(n)
        b_angle = torch.rand(n) * np.pi * 2.0
        b_speed = 2.0 * (torch.rand(n) - 0.5) * 8.0
        b_state = torch.stack(
            [torch.cos(b_angle),
             torch.sin(b_angle), b_speed], dim=1).to(device='cuda',
                                                     dtype=torch.float32)
        coef = torch.zeros(n, dtype=b_state.dtype, device=b_state.device)
        with torch.no_grad():
            action = self.policy(b_state)
            X, ind = torch.sort(action, dim=0)
            for i in range(n):
                if i < m:
                    c = 1
                    a = X[i + m]
                    b = X[0]
                elif i >= m and i < n - m:
                    c = 2
                    a = X[i + m]
                    b = X[i - m]
                else:
                    c = 1
                    a = X[n - 1]
                    b = X[i - m]
                coef[i] = float(n) * float(c) / float(m) * (a - b + 1E-5)

            S = torch.log(coef).mean()

        return S.item()

    def get_value(self, state, action):
        with torch.no_grad():
            return self.critic(state, action).item()

    def select_action(self, state, random_process):
        with torch.no_grad():
            action = self.policy(state)
        noise = max(self.epsilon, 0.0) * random_process.sample()
        self.epsilon -= self.depsilon

        action += torch.from_numpy(noise).to(device=action.device,
                                             dtype=action.dtype)
        action = torch.clamp(action, -1, 1)
        return action

    def random_action(self):
        m = Uniform(torch.tensor([-1.0 for i in range(self.num_actions)]),
                    torch.tensor([1.0 for i in range(self.num_actions)]))
        return m.sample()

    def soft_update(self, src, dst):
        with torch.no_grad():
            for src_param, dst_param in zip(src.parameters(),
                                            dst.parameters()):
                dst_param.copy_(self.tau * src_param +
                                (1.0 - self.tau) * dst_param)

    def hard_update(self, src, dst):
        with torch.no_grad():
            for src_param, dst_param in zip(src.parameters(),
                                            dst.parameters()):
                dst_param.copy_(src_param.clone())

    def load_weights(self, path):
        self.policy.load_state_dict(torch.load('{}/policy.pkl'.format(path)))
        self.critic.load_state_dict(torch.load('{}/critic.pkl'.format(path)))

    def save_model(self, path):
        torch.save(
            self.policy.to(device='cpu').state_dict(),
            '{}/policy.pkl'.format(path))
        torch.save(
            self.critic.to(device='cpu').state_dict(),
            '{}/critic.pkl'.format(path))

Example #7

Show file

    num_train = 200000
    num_eval = 0
    buffer_length = 600000

    # env = NormalizedEnv(gym.make('Pendulum-v0'))
    GODOT_BIN_PATH = "InvPendulum/InvPendulum.x86_64"
    env_abs_path = "InvPendulum/InvPendulum.pck"
    env = NormalizedEnv(
        InvPendulumEnv(exec_path=GODOT_BIN_PATH,
                       env_path=env_abs_path,
                       render=True))

    num_states = env.observation_space.shape[0]
    num_actions = env.action_space.shape[0]
    policy = Actor(num_states, num_actions)
    policy.load_state_dict(torch.load('./policy.pkl'))

    state = env.reset()
    state = state.to(dtype=torch.float32)

    traced_policy = torch.jit.trace(policy, state)
    print(traced_policy.graph)
    print(traced_policy.code)
    traced_policy.save('ddpg_policy.jit')

    for step in range(1000):

        action = policy(state)
        #			torch.tensor([1.0 for i in range(num_actions)])).sample().to(device='cuda')
        time.sleep(0.02)
        # state_next, reward, term, _ = env.step(action.cpu().numpy())