for epoch_index, (epoch_number,
                      weights_path) in enumerate(model_weight_paths):

        logger.info('Starting epoch: {}'.format(epoch_number))

        assert osp.exists(
            weights_path), 'path to weights: {} was not found'.format(
                weights_path)
        state_dict = torch.load(weights_path,
                                map_location=lambda storage, loc: storage)
        if 'model' in state_dict.keys():
            state_dict = state_dict['model']
        model.load_state_dict(state_dict, strict=True)
        model = model.to(device)

        model = model.eval()
        logger.info('weights loaded from path: {}'.format(weights_path))
        logger.info('for epoch: {}'.format(epoch_number))

        Hess = FullHessian(crit='CrossEntropyLoss',
                           loader=loader,
                           device=device,
                           model=model,
                           num_classes=C,
                           hessian_type='Hessian',
                           init_poly_deg=64,
                           poly_deg=128,
                           spectrum_margin=0.05,
                           poly_points=1024,
                           SSI_iters=128)
    # netSubpixel = [Subpixel(intLevel) for intLevel in [2, 3, 4, 5, 6]]

    # print()
    # for s in netSubpixel:
    #     for k, v in s.state_dict().items():
    #         print(k + ': ' + str(v.shape))
    #     print()

    # netRegularization = [Regularization(intLevel) for intLevel in [2, 3, 4, 5, 6]]
    # print()
    # for r in netRegularization:
    #     for k, v in r.state_dict().items():
    #         print(k + ": " + str(v.shape))
    #     print()

    # print("----------------------------------------------------------")
    # flownet = Network()
    # for k, v in flownet.state_dict().items():
    #     print(k + ": " + str(v.shape))

    with dg.guard():
        flownet = Network()
        flownet.eval()
        tenFirst = dg.to_variable(
            np.zeros((1, 3, 1024, 1024)).astype("float32"))
        tenSecond = dg.to_variable(
            np.zeros((1, 3, 1024, 1024)).astype("float32"))
        out = flownet(tenFirst, tenSecond)
        print(out.shape)
Exemple #3
0
class Agent():
    def __init__(self, state_size, action_size, seed):

        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        self.Q = Network(self.state_size, self.action_size, self.seed)
        self.Q_dash = Network(self.state_size, self.action_size, self.seed)

        self.optimizer = optim.Adam(self.Q.parameters(), lr=LR)

        self.replay = ReplayBuffer(self.seed)
        self.t_step = 0

    def step(self, state, action, reward, next_state, done):
        self.replay.add(state, action, reward, next_state, done)
        self.t_step = (self.t_step + 1) % UPDATE_EVERY

        if self.t_step == 0:
            if len(self.replay) > BATCH_SIZE:
                experiences = self.replay.sample()
                self.learn_ddqn(experiences, GAMMA)

    def act(self, state, eps=0.):

        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.Q.eval()
        with torch.no_grad():
            # done to avoid bt
            action_values = self.Q(state)
        self.Q.train()

        # Epsilon-greedy action selection
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn_dqn(self, experiences, gamma):
        '''
        Simple dqn with fixed target Q' and experience replay
        '''
        states, actions, rewards, next_states, dones = experiences

        # Get max predicted Q values (for next states) from target model
        Q_targets_next = self.Q_dash(next_states).detach().max(1)[0].unsqueeze(
            1)
        # Compute Q targets for current states
        # only get reward if its done
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))

        # Get expected Q values from local model
        Q_expected = self.Q(states).gather(1, actions)

        # Compute loss
        loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.soft_update(self.Q, self.Q_dash, TAU)

    def learn_ddqn(self, experiences, gamma):
        #double deep q learning

        states, actions, rewards, next_states, dones = experiences

        best_action_arg = self.Q(next_states).detach()
        a_best = best_action_arg.max(1)[1]
        Q_targets_next = self.Q_dash(next_states).detach().gather(
            1, a_best.unsqueeze(1))
        #Q_targets_next = Q_targets_all[np.arange(BATCH_SIZE), a_best].unsqueeze(1)
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
        # Get expected Q values from local model
        Q_expected = self.Q(states).gather(1, actions)
        # Compute loss
        loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.soft_update(self.Q, self.Q_dash, TAU)

    def soft_update(self, local_model, target_model, tau):
        """Soft update model parameters.
        θ_target = τ*θ_local + (1 - τ)*θ_target

        Params
        ======
            local_model (PyTorch model): weights will be copied from
            target_model (PyTorch model): weights will be copied to
            tau (float): interpolation parameter 
        """
        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1.0 - tau) * target_param.data)
def run(test_dir,
        test_srcs,
        checkpoint,
        vocab,
        out="captions.out.txt",
        batch_size=16,
        max_seq_len=MAX_LEN,
        hidden_dim=HIDDEN_DIM,
        emb_dim=EMB_DIM,
        enc_seq_len=ENC_SEQ_LEN,
        enc_dim=ENC_DIM,
        attn_activation="relu",
        deep_out=False,
        decoder=4,
        attention=3):

    if decoder == 1:
        decoder = models.AttentionDecoder_1
    elif decoder == 2:
        decoder = models.AttentionDecoder_2
    elif decoder == 3:
        decoder = models.AttentionDecoder_3
    elif decoder == 4:
        decoder = models.AttentionDecoder_4

    if attention == 1:
        attention = attentions.AdditiveAttention
    elif attention == 2:
        attention = attentions.GeneralAttention
    elif attention == 3:
        attention = attentions.ScaledGeneralAttention

    # load vocabulary
    vocabulary = Vocab()
    vocabulary.load(vocab)

    # load test instances file paths
    srcs = open(test_srcs).read().strip().split('\n')
    srcs = [os.path.join(test_dir, s) for s in srcs]

    # load model
    net = Network(hid_dim=hidden_dim,
                  out_dim=vocabulary.n_words,
                  sos_token=0,
                  eos_token=1,
                  pad_token=2,
                  emb_dim=emb_dim,
                  enc_seq_len=enc_seq_len,
                  enc_dim=enc_dim,
                  deep_out=deep_out,
                  attention=attention,
                  decoder=decoder)
    net.to(DEVICE)

    net.load_state_dict(torch.load(checkpoint))

    net.eval()

    with torch.no_grad():

        # run inference
        num_instances = len(srcs)
        i = 0
        captions = []
        while i < num_instances:
            srcs_batch = srcs[i:i + batch_size]
            batch = _load_batch(srcs_batch)
            batch = batch.to(DEVICE)

            tokens, _ = net(batch, targets=None, max_len=max_seq_len)
            tokens = tokens.permute(1, 0, 2).detach()
            _, topi = tokens.topk(1, dim=2)
            topi = topi.squeeze(2)

            # decode token output from the model
            for j in range(len(srcs_batch)):
                c = vocabulary.tensor_to_sentence(topi[j])
                c = ' '.join(c)
                captions.append(c)

            i += len(srcs_batch)

    out_f = open(out, mode='w')
    for c in captions:
        out_f.write(c + '\n')

    return
def get_tensors(x):
    return torch.tensor(x, dtype=torch.float32).unsqueeze(0)


if __name__ == '__main__':
    args = get_args()
    # create the environment
    env = gym.make(args.env_name)
    # build up the network
    net = Network(env.observation_space.shape[0], env.action_space.shape[0])
    # load the saved model
    model_path = args.save_dir + args.env_name + '/model.pt'
    network_model, filters = torch.load(
        model_path, map_location=lambda storage, loc: storage)
    net.load_state_dict(network_model)
    net.eval()
    for _ in range(10):
        obs = denormalize(env.reset(), filters.rs.mean, filters.rs.std)
        reward_total = 0
        for _ in range(10000):
            env.render()
            obs_tensor = get_tensors(obs)
            with torch.no_grad():
                _, (mean, _) = net(obs_tensor)
                action = mean.numpy().squeeze()
            obs, reward, done, _ = env.step(action)
            reward_total += reward
            obs = denormalize(obs, filters.rs.mean, filters.rs.std)
            if done:
                break
        print('the reward of this episode is: {}'.format(reward_total))