for epoch_index, (epoch_number, weights_path) in enumerate(model_weight_paths): logger.info('Starting epoch: {}'.format(epoch_number)) assert osp.exists( weights_path), 'path to weights: {} was not found'.format( weights_path) state_dict = torch.load(weights_path, map_location=lambda storage, loc: storage) if 'model' in state_dict.keys(): state_dict = state_dict['model'] model.load_state_dict(state_dict, strict=True) model = model.to(device) model = model.eval() logger.info('weights loaded from path: {}'.format(weights_path)) logger.info('for epoch: {}'.format(epoch_number)) Hess = FullHessian(crit='CrossEntropyLoss', loader=loader, device=device, model=model, num_classes=C, hessian_type='Hessian', init_poly_deg=64, poly_deg=128, spectrum_margin=0.05, poly_points=1024, SSI_iters=128)
# netSubpixel = [Subpixel(intLevel) for intLevel in [2, 3, 4, 5, 6]] # print() # for s in netSubpixel: # for k, v in s.state_dict().items(): # print(k + ': ' + str(v.shape)) # print() # netRegularization = [Regularization(intLevel) for intLevel in [2, 3, 4, 5, 6]] # print() # for r in netRegularization: # for k, v in r.state_dict().items(): # print(k + ": " + str(v.shape)) # print() # print("----------------------------------------------------------") # flownet = Network() # for k, v in flownet.state_dict().items(): # print(k + ": " + str(v.shape)) with dg.guard(): flownet = Network() flownet.eval() tenFirst = dg.to_variable( np.zeros((1, 3, 1024, 1024)).astype("float32")) tenSecond = dg.to_variable( np.zeros((1, 3, 1024, 1024)).astype("float32")) out = flownet(tenFirst, tenSecond) print(out.shape)
class Agent(): def __init__(self, state_size, action_size, seed): self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.Q = Network(self.state_size, self.action_size, self.seed) self.Q_dash = Network(self.state_size, self.action_size, self.seed) self.optimizer = optim.Adam(self.Q.parameters(), lr=LR) self.replay = ReplayBuffer(self.seed) self.t_step = 0 def step(self, state, action, reward, next_state, done): self.replay.add(state, action, reward, next_state, done) self.t_step = (self.t_step + 1) % UPDATE_EVERY if self.t_step == 0: if len(self.replay) > BATCH_SIZE: experiences = self.replay.sample() self.learn_ddqn(experiences, GAMMA) def act(self, state, eps=0.): state = torch.from_numpy(state).float().unsqueeze(0).to(device) self.Q.eval() with torch.no_grad(): # done to avoid bt action_values = self.Q(state) self.Q.train() # Epsilon-greedy action selection if random.random() > eps: return np.argmax(action_values.cpu().data.numpy()) else: return random.choice(np.arange(self.action_size)) def learn_dqn(self, experiences, gamma): ''' Simple dqn with fixed target Q' and experience replay ''' states, actions, rewards, next_states, dones = experiences # Get max predicted Q values (for next states) from target model Q_targets_next = self.Q_dash(next_states).detach().max(1)[0].unsqueeze( 1) # Compute Q targets for current states # only get reward if its done Q_targets = rewards + (gamma * Q_targets_next * (1 - dones)) # Get expected Q values from local model Q_expected = self.Q(states).gather(1, actions) # Compute loss loss = F.mse_loss(Q_expected, Q_targets) # Minimize the loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.soft_update(self.Q, self.Q_dash, TAU) def learn_ddqn(self, experiences, gamma): #double deep q learning states, actions, rewards, next_states, dones = experiences best_action_arg = self.Q(next_states).detach() a_best = best_action_arg.max(1)[1] Q_targets_next = self.Q_dash(next_states).detach().gather( 1, a_best.unsqueeze(1)) #Q_targets_next = Q_targets_all[np.arange(BATCH_SIZE), a_best].unsqueeze(1) Q_targets = rewards + (gamma * Q_targets_next * (1 - dones)) # Get expected Q values from local model Q_expected = self.Q(states).gather(1, actions) # Compute loss loss = F.mse_loss(Q_expected, Q_targets) # Minimize the loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.soft_update(self.Q, self.Q_dash, TAU) def soft_update(self, local_model, target_model, tau): """Soft update model parameters. θ_target = τ*θ_local + (1 - τ)*θ_target Params ====== local_model (PyTorch model): weights will be copied from target_model (PyTorch model): weights will be copied to tau (float): interpolation parameter """ for target_param, local_param in zip(target_model.parameters(), local_model.parameters()): target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
def run(test_dir, test_srcs, checkpoint, vocab, out="captions.out.txt", batch_size=16, max_seq_len=MAX_LEN, hidden_dim=HIDDEN_DIM, emb_dim=EMB_DIM, enc_seq_len=ENC_SEQ_LEN, enc_dim=ENC_DIM, attn_activation="relu", deep_out=False, decoder=4, attention=3): if decoder == 1: decoder = models.AttentionDecoder_1 elif decoder == 2: decoder = models.AttentionDecoder_2 elif decoder == 3: decoder = models.AttentionDecoder_3 elif decoder == 4: decoder = models.AttentionDecoder_4 if attention == 1: attention = attentions.AdditiveAttention elif attention == 2: attention = attentions.GeneralAttention elif attention == 3: attention = attentions.ScaledGeneralAttention # load vocabulary vocabulary = Vocab() vocabulary.load(vocab) # load test instances file paths srcs = open(test_srcs).read().strip().split('\n') srcs = [os.path.join(test_dir, s) for s in srcs] # load model net = Network(hid_dim=hidden_dim, out_dim=vocabulary.n_words, sos_token=0, eos_token=1, pad_token=2, emb_dim=emb_dim, enc_seq_len=enc_seq_len, enc_dim=enc_dim, deep_out=deep_out, attention=attention, decoder=decoder) net.to(DEVICE) net.load_state_dict(torch.load(checkpoint)) net.eval() with torch.no_grad(): # run inference num_instances = len(srcs) i = 0 captions = [] while i < num_instances: srcs_batch = srcs[i:i + batch_size] batch = _load_batch(srcs_batch) batch = batch.to(DEVICE) tokens, _ = net(batch, targets=None, max_len=max_seq_len) tokens = tokens.permute(1, 0, 2).detach() _, topi = tokens.topk(1, dim=2) topi = topi.squeeze(2) # decode token output from the model for j in range(len(srcs_batch)): c = vocabulary.tensor_to_sentence(topi[j]) c = ' '.join(c) captions.append(c) i += len(srcs_batch) out_f = open(out, mode='w') for c in captions: out_f.write(c + '\n') return
def get_tensors(x): return torch.tensor(x, dtype=torch.float32).unsqueeze(0) if __name__ == '__main__': args = get_args() # create the environment env = gym.make(args.env_name) # build up the network net = Network(env.observation_space.shape[0], env.action_space.shape[0]) # load the saved model model_path = args.save_dir + args.env_name + '/model.pt' network_model, filters = torch.load( model_path, map_location=lambda storage, loc: storage) net.load_state_dict(network_model) net.eval() for _ in range(10): obs = denormalize(env.reset(), filters.rs.mean, filters.rs.std) reward_total = 0 for _ in range(10000): env.render() obs_tensor = get_tensors(obs) with torch.no_grad(): _, (mean, _) = net(obs_tensor) action = mean.numpy().squeeze() obs, reward, done, _ = env.step(action) reward_total += reward obs = denormalize(obs, filters.rs.mean, filters.rs.std) if done: break print('the reward of this episode is: {}'.format(reward_total))