Esempio n. 1
0
def main():
    args = get_args()

    data_dir = "../data/"
    ## data preparation
    _, valid_loader = data.load_data(data_dir=data_dir,
                                     input_size=224,
                                     batch_size=args.batch_size,
                                     augmentation=args.augmentation)
    print('Computing t-SNE embedding')
    tsne = TSNE(n_components=2)
    t0 = time()
    pretrained_model = Network(20).to(args.device)
    pretrained_model.load_state_dict(torch.load('tsne.pt'))
    outputs = []
    label_list = []
    for inputs, labels in valid_loader:
        inputs = inputs.to(args.device)
        output = forward(pretrained_model, inputs)
        outputs.append(output.cpu().detach().numpy().astype(np.float64))
        label_list.append(labels)
    output = np.concatenate(outputs, axis=0)
    labels = np.concatenate(label_list, axis=0)
    result = tsne.fit_transform(output)

    plot_embedding(
        result, labels,
        't-SNE embedding of the 20 classes (time %.2fs)' % (time() - t0))
Esempio n. 2
0
def main():
    # random seed
    seed = 1234
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # load dataset
    if args.dataset[0] == 'deepfashion':
        ds = pd.read_csv('./Anno/df_info.csv')
        from dataset import DeepFashionDataset as DataManager
    elif args.dataset[0] == 'fld':
        ds = pd.read_csv('./Anno/fld_info.csv')
        from dataset import FLDDataset as DataManager
    else:
        raise ValueError

    print('dataset : %s' % (args.dataset[0]))
    if not args.evaluate:
        train_dm = DataManager(ds[ds['evaluation_status'] == 'train'],
                               root=args.root)
        train_dl = DataLoader(train_dm,
                              batch_size=args.batchsize,
                              shuffle=True)

        if os.path.exists('models') is False:
            os.makedirs('models')

    test_dm = DataManager(ds[ds['evaluation_status'] == 'test'],
                          root=args.root)
    test_dl = DataLoader(test_dm, batch_size=args.batchsize, shuffle=False)

    # Load model
    print("Load the model...")
    net = Network(dataset=args.dataset, flag=args.glem).cuda()
    if not args.weight_file == None:
        weights = torch.load(args.weight_file)
        if args.update_weight:
            weights = utils.load_weight(net, weights)
        net.load_state_dict(weights)

    # evaluate only
    if args.evaluate:
        print("Evaluation only")
        test(net, test_dl, 0)
        return

    # learning parameters
    optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 5, 0.1)

    print('Start training')
    for epoch in range(args.epoch):
        lr_scheduler.step()
        train(net, optimizer, train_dl, epoch)
        test(net, test_dl, epoch)
    model_weight_paths = get_model_weight_paths(ckpt_dir, args.num)

    for epoch_index, (epoch_number,
                      weights_path) in enumerate(model_weight_paths):

        logger.info('Starting epoch: {}'.format(epoch_number))

        assert osp.exists(
            weights_path), 'path to weights: {} was not found'.format(
                weights_path)
        state_dict = torch.load(weights_path,
                                map_location=lambda storage, loc: storage)
        if 'model' in state_dict.keys():
            state_dict = state_dict['model']
        model.load_state_dict(state_dict, strict=True)
        model = model.to(device)

        model = model.eval()
        logger.info('weights loaded from path: {}'.format(weights_path))
        logger.info('for epoch: {}'.format(epoch_number))

        Hess = FullHessian(crit='CrossEntropyLoss',
                           loader=loader,
                           device=device,
                           model=model,
                           num_classes=C,
                           hessian_type='Hessian',
                           init_poly_deg=64,
                           poly_deg=128,
                           spectrum_margin=0.05,
Esempio n. 4
0
# df = pd.read_csv(results_path)

# First row in CSV, which contains different parameters
# row = df.iloc[0]

# In[4]:

#%% Network

# Initialize network
model = Network().construct(net, row)
model = model.eval()

# Load trained model
state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
model.load_state_dict(state_dict, strict=False)
model = model.to(device)

gpus = torch.cuda.device_count()
if gpus > 1:
    print("Let's use", gpus, "GPUs!")
    model = nn.DataParallel(model, device_ids=range(gpus))

# In[5]:

#%% Dataset

# Transform
mean, std = get_mean_std(dataset)
pad = int((row.padded_im_size - row.im_size) / 2)
transform = transforms.Compose([
Esempio n. 5
0
class trpo_agent:
    def __init__(self, env, args):
        self.env = env
        self.args = args

        # define the network
        self.net = Network(self.env.observation_space.shape[0],
                           self.env.action_space.shape[0])
        self.old_net = Network(self.env.observation_space.shape[0],
                               self.env.action_space.shape[0])

        # make sure the net and old net have the same parameters
        self.old_net.load_state_dict(self.net.state_dict())

        # define the optimizer
        self.optimizer = torch.optim.Adam(self.net.critic.parameters(),
                                          lr=self.args.lr)

        # define the running mean filter
        self.running_state = ZFilter((self.env.observation_space.shape[0], ),
                                     clip=5)

        if not os.path.exists(self.args.save_dir):
            os.mkdir(self.args.save_dir)
        self.model_path = self.args.save_dir + self.args.env_name
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)

        self.start_episode = 0

    def learn(self):

        # configuration
        USER_SAVE_DATE = '3006'
        USER_SAVE_MODEL = 'mymodel.pt'
        CONTINUE_TRAINING = False  # False for new training, True for improving the existing model
        num_of_iteration = 0

        # paths
        date = USER_SAVE_DATE
        plot_path = self.model_path + '/' + date + '/plots/plot_'
        best_model_path = self.model_path + '/' + date + '/best/'
        all_model_path = self.model_path + '/' + date
        reward_path = self.model_path + '/' + date + '/rewards/'

        load_model = CONTINUE_TRAINING
        best_model = all_model_path + '/' + USER_SAVE_MODEL
        all_final_rewards = []

        num_updates = 1000000
        obs = self.running_state(self.env.reset())

        final_reward = 0
        episode_reward = 0
        self.dones = False

        # Load the best model for continuing training
        if load_model:
            print("=> Loading checkpoint...")
            checkpoint = torch.load(best_model)
            self.start_episode = checkpoint['update']
            self.net.load_state_dict(checkpoint['state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            self.running_state = checkpoint['running_state']
            final_reward = checkpoint['reward']
            all_final_rewards.append(final_reward)
            #print("=> loaded checkpoint (Episode: {}, reward: {})".format(checkpoint['update'], final_reward))

        for update in range(self.start_episode, num_updates):
            mb_obs, mb_rewards, mb_actions, mb_dones, mb_values = [], [], [], [], []
            for step in range(self.args.nsteps):
                with torch.no_grad():
                    obs_tensor = self._get_tensors(obs)
                    value, pi = self.net(obs_tensor)
                # select actions
                actions = select_actions(pi)
                # store informations
                mb_obs.append(np.copy(obs))
                mb_actions.append(actions)
                mb_dones.append(self.dones)
                mb_values.append(value.detach().numpy().squeeze())
                # start to execute actions in the environment
                obs_, reward, done, _ = self.env.step(actions)
                self.dones = done
                mb_rewards.append(reward)
                if done:
                    obs_ = self.env.reset()
                obs = self.running_state(obs_)
                episode_reward += reward
                mask = 0.0 if done else 1.0
                final_reward *= mask
                final_reward += (1 - mask) * episode_reward
                episode_reward *= mask
            # to process the rollouts
            mb_obs = np.asarray(mb_obs, dtype=np.float32)
            mb_rewards = np.asarray(mb_rewards, dtype=np.float32)
            mb_actions = np.asarray(mb_actions, dtype=np.float32)
            mb_dones = np.asarray(mb_dones, dtype=np.bool)
            mb_values = np.asarray(mb_values, dtype=np.float32)
            # compute the last state value
            with torch.no_grad():
                obs_tensor = self._get_tensors(obs)
                last_value, _ = self.net(obs_tensor)
                last_value = last_value.detach().numpy().squeeze()
            # compute the advantages
            mb_returns = np.zeros_like(mb_rewards)
            mb_advs = np.zeros_like(mb_rewards)
            lastgaelam = 0
            for t in reversed(range(self.args.nsteps)):
                if t == self.args.nsteps - 1:
                    nextnonterminal = 1.0 - self.dones
                    nextvalues = last_value
                else:
                    nextnonterminal = 1.0 - mb_dones[t + 1]
                    nextvalues = mb_values[t + 1]
                delta = mb_rewards[
                    t] + self.args.gamma * nextvalues * nextnonterminal - mb_values[
                        t]
                mb_advs[
                    t] = lastgaelam = delta + self.args.gamma * self.args.tau * nextnonterminal * lastgaelam
            mb_returns = mb_advs + mb_values
            # normalize the advantages
            mb_advs = (mb_advs - mb_advs.mean()) / (mb_advs.std() + 1e-5)
            # before the update, make the old network has the parameter of the current network
            self.old_net.load_state_dict(self.net.state_dict())
            # start to update the network
            policy_loss, value_loss = self._update_network(
                mb_obs, mb_actions, mb_returns, mb_advs)
            #torch.save([self.net.state_dict(), self.running_state], self.model_path + 'model.pt')

            print('Episode: {} / {}, Iteration: {}, Reward: {:.3f}'.format(
                update, num_updates, (update + 1) * self.args.nsteps,
                final_reward))

            all_final_rewards.append(final_reward.item())
            self.save_model_for_training(update,
                                         final_reward.item(),
                                         filepath=best_model_path +
                                         str(round(final_reward.item(), 2)) +
                                         '_' + str(update) + '.pt')

            torch.save([self.net.state_dict(), self.running_state],
                       self.model_path + "/" + date + "/" +
                       str(round(final_reward.item(), 2)) + str(update) +
                       '_testing' + ".pt")

            if update % self.args.display_interval == 0:
                fig = plt.figure()
                ax = fig.add_subplot(111)
                plt.plot(np.arange(len(all_final_rewards)), all_final_rewards)
                plt.ylabel('Reward')
                plt.xlabel('Episode #')
                plt.savefig(plot_path + str(update) + '.png')
                plt.plot()
                reward_df = pd.DataFrame(all_final_rewards)
                with open(reward_path + 'rewards.csv', 'a') as f:
                    reward_df.to_csv(f, header=False)

    def save_model_for_training(self, num_of_iteration, reward, filepath):
        checkpoint = {
            'update': num_of_iteration,
            'state_dict': self.net.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'running_state': self.running_state,
            'reward': reward
        }
        torch.save(checkpoint, filepath)

    # start to update network
    def _update_network(self, mb_obs, mb_actions, mb_returns, mb_advs):
        mb_obs_tensor = torch.tensor(mb_obs, dtype=torch.float32)
        mb_actions_tensor = torch.tensor(mb_actions, dtype=torch.float32)
        mb_returns_tensor = torch.tensor(mb_returns,
                                         dtype=torch.float32).unsqueeze(1)
        mb_advs_tensor = torch.tensor(mb_advs,
                                      dtype=torch.float32).unsqueeze(1)
        # try to get the old policy and current policy
        values, _ = self.net(mb_obs_tensor)
        with torch.no_grad():
            _, pi_old = self.old_net(mb_obs_tensor)
        # get the surr loss
        surr_loss = self._get_surrogate_loss(mb_obs_tensor, mb_advs_tensor,
                                             mb_actions_tensor, pi_old)
        # comupte the surrogate gardient -> g, Ax = g, where A is the fisher information matrix
        surr_grad = torch.autograd.grad(surr_loss, self.net.actor.parameters())
        flat_surr_grad = torch.cat([grad.view(-1) for grad in surr_grad]).data
        # use the conjugated gradient to calculate the scaled direction vector (natural gradient)
        nature_grad = conjugated_gradient(self._fisher_vector_product,
                                          -flat_surr_grad, 10, mb_obs_tensor,
                                          pi_old)
        # calculate the scaleing ratio
        non_scale_kl = 0.5 * (nature_grad * self._fisher_vector_product(
            nature_grad, mb_obs_tensor, pi_old)).sum(0, keepdim=True)
        scale_ratio = torch.sqrt(non_scale_kl / self.args.max_kl)
        final_nature_grad = nature_grad / scale_ratio[0]
        # calculate the expected improvement rate...
        expected_improve = (-flat_surr_grad * nature_grad).sum(
            0, keepdim=True) / scale_ratio[0]
        # get the flat param ...
        prev_params = torch.cat(
            [param.data.view(-1) for param in self.net.actor.parameters()])
        # start to do the line search
        success, new_params = line_search(self.net.actor, self._get_surrogate_loss, prev_params, final_nature_grad, \
                                expected_improve, mb_obs_tensor, mb_advs_tensor, mb_actions_tensor, pi_old)
        set_flat_params_to(self.net.actor, new_params)
        # then trying to update the critic network
        inds = np.arange(mb_obs.shape[0])
        for _ in range(self.args.vf_itrs):
            np.random.shuffle(inds)
            for start in range(0, mb_obs.shape[0], self.args.batch_size):
                end = start + self.args.batch_size
                mbinds = inds[start:end]
                mini_obs = mb_obs[mbinds]
                mini_returns = mb_returns[mbinds]
                # put things in the tensor
                mini_obs = torch.tensor(mini_obs, dtype=torch.float32)
                mini_returns = torch.tensor(mini_returns,
                                            dtype=torch.float32).unsqueeze(1)
                values, _ = self.net(mini_obs)
                v_loss = (mini_returns - values).pow(2).mean()
                self.optimizer.zero_grad()
                v_loss.backward()
                self.optimizer.step()
        return surr_loss.item(), v_loss.item()

    # get the surrogate loss
    def _get_surrogate_loss(self, obs, adv, actions, pi_old):
        _, pi = self.net(obs)
        log_prob = eval_actions(pi, actions)
        old_log_prob = eval_actions(pi_old, actions).detach()
        surr_loss = -torch.exp(log_prob - old_log_prob) * adv
        return surr_loss.mean()

    # the product of the fisher informaiton matrix and the nature gradient -> Ax
    def _fisher_vector_product(self, v, obs, pi_old):
        kl = self._get_kl(obs, pi_old)
        kl = kl.mean()
        # start to calculate the second order gradient of the KL
        kl_grads = torch.autograd.grad(kl,
                                       self.net.actor.parameters(),
                                       create_graph=True)
        flat_kl_grads = torch.cat([grad.view(-1) for grad in kl_grads])
        kl_v = (flat_kl_grads * torch.autograd.Variable(v)).sum()
        kl_second_grads = torch.autograd.grad(kl_v,
                                              self.net.actor.parameters())
        flat_kl_second_grads = torch.cat(
            [grad.contiguous().view(-1) for grad in kl_second_grads]).data
        flat_kl_second_grads = flat_kl_second_grads + self.args.damping * v
        return flat_kl_second_grads

    # get the kl divergence between two distributions
    def _get_kl(self, obs, pi_old):
        mean_old, std_old = pi_old
        _, pi = self.net(obs)
        mean, std = pi
        # start to calculate the kl-divergence
        kl = -torch.log(std / std_old) + (
            std.pow(2) + (mean - mean_old).pow(2)) / (2 * std_old.pow(2)) - 0.5
        return kl.sum(1, keepdim=True)

    # get the tensors
    def _get_tensors(self, obs):
        return torch.tensor(obs, dtype=torch.float32).unsqueeze(0)
Esempio n. 6
0
class Agent():
    def __init__(self, gamma, epsilon, lr, n_actions=, input_dims,
                 mem_size, batch_size, eps_min=0.01, eps_dec=5e-7,
                 replace=1000, chkpt_dir='tmp/dueling_ddqn'):
        self.gamma = gamma
        self.epsilon = epsilon
        self.lr = lr
        self.n_actions = n_actions
        self.input_dims = input_dims
        self.batch_size = batch_size
        self.eps_min = eps_min
        self.eps_dec = eps_dec
        self.replace_target_cnt = replace
        self.chkpt_dir = chkpt_dir
        self.action_space = [i for i in range(self.n_actions)]
        self.learn_step_counter = 0

        self.memory = ReplayBuffer(mem_size, input_dims, n_actions)

        self.q_eval = Network(self.lr, self.n_actions,
                                   input_dims=self.input_dims,
                                   name='lunar_lander_dueling_ddqn_q_eval',
                                   chkpt_dir=self.chkpt_dir)

        self.q_next = Network(self.lr, self.n_actions,
                                   input_dims=self.input_dims,
                                   name='lunar_lander_dueling_ddqn_q_next',
                                   chkpt_dir=self.chkpt_dir)

    def choose_action(self, observation):
        if np.random.random() > self.epsilon:
            state = torch.tensor([observation],dtype=torch.float).to(self.q_eval.device)
            _, advantage = self.q_eval.forward(state)
            action = torch.argmax(advantage).item()
        else:
            action = np.random.choice(self.action_space)

        return action

    def store_transition(self, state, action, reward, state_, done):
        self.memory.store_transition(state, action, reward, state_, done)

    def replace_target_network(self):
        if self.learn_step_counter % self.replace_target_cnt == 0:
            self.q_next.load_state_dict(self.q_eval.state_dict())

    def decrement_epsilon(self):
        self.epsilon = self.epsilon - self.eps_dec \
                        if self.epsilon > self.eps_min else self.eps_min

    def save_models(self):
        self.q_eval.save_checkpoint()
        self.q_next.save_checkpoint()

    def load_models(self):
        self.q_eval.load_checkpoint()
        self.q_next.load_checkpoint()

    def learn(self):
        if self.memory.mem_cntr < self.batch_size:
            return

        self.q_eval.optimizer.zero_grad()

        self.replace_target_network()

        state, action, reward, new_state, done = \
                                self.memory.sample_buffer(self.batch_size)

        states = torch.tensor(state).to(self.q_eval.device)
        rewards = torch.tensor(reward).to(self.q_eval.device)
        dones = torch.tensor(done).to(self.q_eval.device)
        actions = torch.tensor(action).to(self.q_eval.device)
        states_ = torch.tensor(new_state).to(self.q_eval.device)

        indices = np.arange(self.batch_size)

        V_s, A_s = self.q_eval.forward(states)
        V_s_, A_s_ = self.q_next.forward(states_)

        V_s_eval, A_s_eval = self.q_eval.forward(states_)

        q_pred = torch.add(V_s,
                        (A_s - A_s.mean(dim=1, keepdim=True)))[indices, actions]
        q_next = torch.add(V_s_,
                        (A_s_ - A_s_.mean(dim=1, keepdim=True)))

        q_eval = torch.add(V_s_eval, (A_s_eval - A_s_eval.mean(dim=1,keepdim=True)))

        max_actions = torch.argmax(q_eval, dim=1)

        q_next[dones] = 0.0
        q_target = rewards + self.gamma*q_next[indices, max_actions]

        loss = self.q_eval.loss(q_target, q_pred).to(self.q_eval.device)
        loss.backward()
        self.q_eval.optimizer.step()
        self.learn_step_counter += 1

        self.decrement_epsilon()
Esempio n. 7
0
def train(train_feats,
          train_caps,
          val_feats,
          val_caps,
          train_prefix="",
          val_prefix="",
          epochs=EPOCHS,
          batch_size=BATCH_SIZE,
          max_seq_len=MAX_LEN,
          hidden_dim=HIDDEN_DIM,
          emb_dim=EMB_DIM,
          enc_seq_len=ENC_SEQ_LEN,
          enc_dim=ENC_DIM,
          clip_val=CLIP_VAL,
          teacher_force=TEACHER_FORCE_RAT,
          dropout_p=0.1,
          attn_activation="relu",
          epsilon=0.0005,
          weight_decay=WEIGHT_DECAY,
          lr=LEARNING_RATE,
          early_stopping=True,
          scheduler="step",
          attention=None,
          deep_out=False,
          checkpoint="",
          out_dir="Pytorch_Exp_Out",
          decoder=None):

    print("EXPERIMENT START ", time.asctime())

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    # 1. Load the data

    train_captions = open(train_caps, mode='r', encoding='utf-8') \
        .read().strip().split('\n')
    train_features = open(train_feats, mode='r').read().strip().split('\n')
    train_features = [os.path.join(train_prefix, z) for z in train_features]

    assert len(train_captions) == len(train_features)

    if val_caps:
        val_captions = open(val_caps, mode='r', encoding='utf-8') \
            .read().strip().split('\n')

        val_features = open(val_feats, mode='r').read().strip().split('\n')
        val_features = [os.path.join(val_prefix, z) for z in val_features]

        assert len(val_captions) == len(val_features)

    # 2. Preprocess the data

    train_captions = normalize_strings(train_captions)
    train_data = list(zip(train_captions, train_features))
    train_data = filter_inputs(train_data)
    print("Total training instances: ", len(train_data))

    if val_caps:
        val_captions = normalize_strings(val_captions)
        val_data = list(zip(val_captions, val_features))
        val_data = filter_inputs(val_data)
        print("Total validation instances: ", len(val_data))

    vocab = Vocab()
    vocab.build_vocab(map(lambda x: x[0], train_data), max_size=10000)
    vocab.save(path=os.path.join(out_dir, 'vocab.txt'))
    print("Vocabulary size: ", vocab.n_words)

    # 3. Initialize the network, optimizer & loss function

    net = Network(hid_dim=hidden_dim,
                  out_dim=vocab.n_words,
                  sos_token=0,
                  eos_token=1,
                  pad_token=2,
                  teacher_forcing_rat=teacher_force,
                  emb_dim=emb_dim,
                  enc_seq_len=enc_seq_len,
                  enc_dim=enc_dim,
                  dropout_p=dropout_p,
                  deep_out=deep_out,
                  decoder=decoder,
                  attention=attention)
    net.to(DEVICE)

    if checkpoint:
        net.load_state_dict(torch.load(checkpoint))

    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    loss_function = nn.NLLLoss()

    scheduler = set_scheduler(scheduler, optimizer)

    # 4. Train

    prev_val_l = sys.maxsize
    total_instances = 0
    total_steps = 0
    train_loss_log = []
    train_loss_log_batches = []
    train_penalty_log = []
    val_loss_log = []
    val_loss_log_batches = []
    val_bleu_log = []
    prev_bleu = sys.maxsize

    train_data = DataLoader(captions=map(lambda x: x[0], train_data),
                            sources=map(lambda x: x[1], train_data),
                            batch_size=batch_size,
                            vocab=vocab,
                            max_seq_len=max_seq_len)

    if val_caps:
        val_data = DataLoader(captions=map(lambda x: x[0], val_data),
                              sources=map(lambda x: x[1], val_data),
                              batch_size=batch_size,
                              vocab=vocab,
                              max_seq_len=max_seq_len,
                              val_multiref=True)

    training_start_time = time.time()

    for e in range(1, epochs + 1):
        print("Epoch ", e)

        tfr = _teacher_force(epochs, e, teacher_force)

        # train one epoch
        train_l, inst, steps, t, l_log, pen = train_epoch(
            model=net,
            loss_function=loss_function,
            optimizer=optimizer,
            data_iter=train_data,
            max_len=max_seq_len,
            clip_val=clip_val,
            epsilon=epsilon,
            teacher_forcing_rat=tfr)

        if scheduler is not None:
            scheduler.step()

        # epoch logs
        print("Training loss:\t", train_l)
        print("Instances:\t", inst)
        print("Steps:\t", steps)
        hours = t // 3600
        mins = (t % 3600) // 60
        secs = (t % 60)
        print("Time:\t{0}:{1}:{2}".format(hours, mins, secs))
        total_instances += inst
        total_steps += steps
        train_loss_log.append(train_l)
        train_loss_log_batches += l_log
        train_penalty_log.append(pen)
        print()

        # evaluate
        if val_caps:
            val_l, l_log, bleu = evaluate(model=net,
                                          loss_function=loss_function,
                                          data_iter=val_data,
                                          max_len=max_seq_len,
                                          epsilon=epsilon)

            # validation logs
            print("Validation loss: ", val_l)
            print("Validation BLEU-4: ", bleu)
            if bleu > prev_bleu:
                torch.save(net.state_dict(), os.path.join(out_dir, 'net.pt'))
            val_loss_log.append(val_l)
            val_bleu_log.append(bleu)
            val_loss_log_batches += l_log

        #sample model
        print("Sampling training data...")
        print()
        samples = sample(net,
                         train_data,
                         vocab,
                         samples=3,
                         max_len=max_seq_len)
        for t, s in samples:
            print("Target:\t", t)
            print("Predicted:\t", s)
            print()

        # if val_caps:
        #     print("Sampling validation data...")
        #     print()
        #     samples = sample(net, val_data, vocab, samples=3, max_len=max_seq_len)
        #     for t, s in samples:
        #         print("Target:\t", t)
        #         print("Predicted:\t", s)
        #         print()

        if val_caps:
            # If the validation loss after this epoch increased from the
            # previous epoch, wrap training.
            if prev_bleu > bleu and early_stopping:
                print("\nWrapping training after {0} epochs.\n".format(e + 1))
                break

            prev_val_l = val_l
            prev_bleu = bleu

    # Experiment summary logs.
    tot_time = time.time() - training_start_time
    hours = tot_time // 3600
    mins = (tot_time % 3600) // 60
    secs = (tot_time % 60)
    print("Total training time:\t{0}:{1}:{2}".format(hours, mins, secs))
    print("Total training instances:\t", total_instances)
    print("Total training steps:\t", total_steps)
    print()

    _write_loss_log("train_loss_log.txt", out_dir, train_loss_log)
    _write_loss_log("train_loss_log_batches.txt", out_dir,
                    train_loss_log_batches)
    _write_loss_log("train_penalty.txt", out_dir, train_penalty_log)

    if val_caps:
        _write_loss_log("val_loss_log.txt", out_dir, val_loss_log)
        _write_loss_log("val_loss_log_batches.txt", out_dir,
                        val_loss_log_batches)
        _write_loss_log("val_bleu4_log.txt", out_dir, val_bleu_log)

    print("EXPERIMENT END ", time.asctime())
Esempio n. 8
0
def run(test_dir,
        test_srcs,
        checkpoint,
        vocab,
        out="captions.out.txt",
        batch_size=16,
        max_seq_len=MAX_LEN,
        hidden_dim=HIDDEN_DIM,
        emb_dim=EMB_DIM,
        enc_seq_len=ENC_SEQ_LEN,
        enc_dim=ENC_DIM,
        attn_activation="relu",
        deep_out=False,
        decoder=4,
        attention=3):

    if decoder == 1:
        decoder = models.AttentionDecoder_1
    elif decoder == 2:
        decoder = models.AttentionDecoder_2
    elif decoder == 3:
        decoder = models.AttentionDecoder_3
    elif decoder == 4:
        decoder = models.AttentionDecoder_4

    if attention == 1:
        attention = attentions.AdditiveAttention
    elif attention == 2:
        attention = attentions.GeneralAttention
    elif attention == 3:
        attention = attentions.ScaledGeneralAttention

    # load vocabulary
    vocabulary = Vocab()
    vocabulary.load(vocab)

    # load test instances file paths
    srcs = open(test_srcs).read().strip().split('\n')
    srcs = [os.path.join(test_dir, s) for s in srcs]

    # load model
    net = Network(hid_dim=hidden_dim,
                  out_dim=vocabulary.n_words,
                  sos_token=0,
                  eos_token=1,
                  pad_token=2,
                  emb_dim=emb_dim,
                  enc_seq_len=enc_seq_len,
                  enc_dim=enc_dim,
                  deep_out=deep_out,
                  attention=attention,
                  decoder=decoder)
    net.to(DEVICE)

    net.load_state_dict(torch.load(checkpoint))

    net.eval()

    with torch.no_grad():

        # run inference
        num_instances = len(srcs)
        i = 0
        captions = []
        while i < num_instances:
            srcs_batch = srcs[i:i + batch_size]
            batch = _load_batch(srcs_batch)
            batch = batch.to(DEVICE)

            tokens, _ = net(batch, targets=None, max_len=max_seq_len)
            tokens = tokens.permute(1, 0, 2).detach()
            _, topi = tokens.topk(1, dim=2)
            topi = topi.squeeze(2)

            # decode token output from the model
            for j in range(len(srcs_batch)):
                c = vocabulary.tensor_to_sentence(topi[j])
                c = ' '.join(c)
                captions.append(c)

            i += len(srcs_batch)

    out_f = open(out, mode='w')
    for c in captions:
        out_f.write(c + '\n')

    return
Esempio n. 9
0
class trpo_agent:
    def __init__(self, env, args):
        self.env = env
        self.args = args
        # define the network
        self.net = Network(self.env.observation_space.shape[0],
                           self.env.action_space.shape[0])
        self.old_net = Network(self.env.observation_space.shape[0],
                               self.env.action_space.shape[0])
        # make sure the net and old net have the same parameters
        self.old_net.load_state_dict(self.net.state_dict())
        # define the optimizer
        self.optimizer = torch.optim.Adam(self.net.critic.parameters(),
                                          lr=self.args.lr)
        # define the running mean filter
        self.running_state = ZFilter((self.env.observation_space.shape[0], ),
                                     clip=5)
        if not os.path.exists(self.args.save_dir):
            os.mkdir(self.args.save_dir)
        self.model_path = self.args.save_dir + self.args.env_name + '/'
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)

    def learn(self):
        num_updates = self.args.total_timesteps // self.args.nsteps
        obs = self.running_state(self.env.reset())
        final_reward = 0
        episode_reward = 0
        self.dones = False
        for update in range(num_updates):
            mb_obs, mb_rewards, mb_actions, mb_dones, mb_values = [], [], [], [], []
            for step in range(self.args.nsteps):
                with torch.no_grad():
                    obs_tensor = self._get_tensors(obs)
                    value, pi = self.net(obs_tensor)
                # select actions
                actions = select_actions(pi)
                # store informations
                mb_obs.append(np.copy(obs))
                mb_actions.append(actions)
                mb_dones.append(self.dones)
                mb_values.append(value.detach().numpy().squeeze())
                # start to execute actions in the environment
                obs_, reward, done, _ = self.env.step(actions)
                self.dones = done
                mb_rewards.append(reward)
                if done:
                    obs_ = self.env.reset()
                obs = self.running_state(obs_)
                episode_reward += reward
                mask = 0.0 if done else 1.0
                final_reward *= mask
                final_reward += (1 - mask) * episode_reward
                episode_reward *= mask
            # to process the rollouts
            mb_obs = np.asarray(mb_obs, dtype=np.float32)
            mb_rewards = np.asarray(mb_rewards, dtype=np.float32)
            mb_actions = np.asarray(mb_actions, dtype=np.float32)
            mb_dones = np.asarray(mb_dones, dtype=np.bool)
            mb_values = np.asarray(mb_values, dtype=np.float32)
            # compute the last state value
            with torch.no_grad():
                obs_tensor = self._get_tensors(obs)
                last_value, _ = self.net(obs_tensor)
                last_value = last_value.detach().numpy().squeeze()
            # compute the advantages
            mb_returns = np.zeros_like(mb_rewards)
            mb_advs = np.zeros_like(mb_rewards)
            lastgaelam = 0
            for t in reversed(range(self.args.nsteps)):
                if t == self.args.nsteps - 1:
                    nextnonterminal = 1.0 - self.dones
                    nextvalues = last_value
                else:
                    nextnonterminal = 1.0 - mb_dones[t + 1]
                    nextvalues = mb_values[t + 1]
                delta = mb_rewards[
                    t] + self.args.gamma * nextvalues * nextnonterminal - mb_values[
                        t]
                mb_advs[
                    t] = lastgaelam = delta + self.args.gamma * self.args.tau * nextnonterminal * lastgaelam
            mb_returns = mb_advs + mb_values
            # normalize the advantages
            mb_advs = (mb_advs - mb_advs.mean()) / (mb_advs.std() + 1e-5)
            # before the update, make the old network has the parameter of the current network
            self.old_net.load_state_dict(self.net.state_dict())
            # start to update the network
            policy_loss, value_loss = self._update_network(
                mb_obs, mb_actions, mb_returns, mb_advs)
            torch.save([self.net.state_dict(), self.running_state],
                       self.model_path + 'model.pt')
            print('[{}] Update: {} / {}, Frames: {}, Reward: {:.3f}, VL: {:.3f}, PL: {:.3f}'.format(datetime.now(), update, \
                    num_updates, (update + 1)*self.args.nsteps, final_reward, value_loss, policy_loss))

    # start to update network
    def _update_network(self, mb_obs, mb_actions, mb_returns, mb_advs):
        mb_obs_tensor = torch.tensor(mb_obs, dtype=torch.float32)
        mb_actions_tensor = torch.tensor(mb_actions, dtype=torch.float32)
        mb_returns_tensor = torch.tensor(mb_returns,
                                         dtype=torch.float32).unsqueeze(1)
        mb_advs_tensor = torch.tensor(mb_advs,
                                      dtype=torch.float32).unsqueeze(1)
        # try to get the old policy and current policy
        values, _ = self.net(mb_obs_tensor)
        with torch.no_grad():
            _, pi_old = self.old_net(mb_obs_tensor)
        # get the surr loss
        surr_loss = self._get_surrogate_loss(mb_obs_tensor, mb_advs_tensor,
                                             mb_actions_tensor, pi_old)
        # comupte the surrogate gardient -> g, Ax = g, where A is the fisher information matrix
        surr_grad = torch.autograd.grad(surr_loss, self.net.actor.parameters())
        flat_surr_grad = torch.cat([grad.view(-1) for grad in surr_grad]).data
        # use the conjugated gradient to calculate the scaled direction vector (natural gradient)
        nature_grad = conjugated_gradient(self._fisher_vector_product,
                                          -flat_surr_grad, 10, mb_obs_tensor,
                                          pi_old)
        # calculate the scaleing ratio
        non_scale_kl = 0.5 * (nature_grad * self._fisher_vector_product(
            nature_grad, mb_obs_tensor, pi_old)).sum(0, keepdim=True)
        scale_ratio = torch.sqrt(non_scale_kl / self.args.max_kl)
        final_nature_grad = nature_grad / scale_ratio[0]
        # calculate the expected improvement rate...
        expected_improve = (-flat_surr_grad * nature_grad).sum(
            0, keepdim=True) / scale_ratio[0]
        # get the flat param ...
        prev_params = torch.cat(
            [param.data.view(-1) for param in self.net.actor.parameters()])
        # start to do the line search
        success, new_params = line_search(self.net.actor, self._get_surrogate_loss, prev_params, final_nature_grad, \
                                expected_improve, mb_obs_tensor, mb_advs_tensor, mb_actions_tensor, pi_old)
        set_flat_params_to(self.net.actor, new_params)
        # then trying to update the critic network
        inds = np.arange(mb_obs.shape[0])
        for _ in range(self.args.vf_itrs):
            np.random.shuffle(inds)
            for start in range(0, mb_obs.shape[0], self.args.batch_size):
                end = start + self.args.batch_size
                mbinds = inds[start:end]
                mini_obs = mb_obs[mbinds]
                mini_returns = mb_returns[mbinds]
                # put things in the tensor
                mini_obs = torch.tensor(mini_obs, dtype=torch.float32)
                mini_returns = torch.tensor(mini_returns,
                                            dtype=torch.float32).unsqueeze(1)
                values, _ = self.net(mini_obs)
                v_loss = (mini_returns - values).pow(2).mean()
                self.optimizer.zero_grad()
                v_loss.backward()
                self.optimizer.step()
        return surr_loss.item(), v_loss.item()

    # get the surrogate loss
    def _get_surrogate_loss(self, obs, adv, actions, pi_old):
        _, pi = self.net(obs)
        log_prob = eval_actions(pi, actions)
        old_log_prob = eval_actions(pi_old, actions).detach()
        surr_loss = -torch.exp(log_prob - old_log_prob) * adv
        return surr_loss.mean()

    # the product of the fisher informaiton matrix and the nature gradient -> Ax
    def _fisher_vector_product(self, v, obs, pi_old):
        kl = self._get_kl(obs, pi_old)
        kl = kl.mean()
        # start to calculate the second order gradient of the KL
        kl_grads = torch.autograd.grad(kl,
                                       self.net.actor.parameters(),
                                       create_graph=True)
        flat_kl_grads = torch.cat([grad.view(-1) for grad in kl_grads])
        kl_v = (flat_kl_grads * torch.autograd.Variable(v)).sum()
        kl_second_grads = torch.autograd.grad(kl_v,
                                              self.net.actor.parameters())
        flat_kl_second_grads = torch.cat(
            [grad.contiguous().view(-1) for grad in kl_second_grads]).data
        flat_kl_second_grads = flat_kl_second_grads + self.args.damping * v
        return flat_kl_second_grads

    # get the kl divergence between two distributions
    def _get_kl(self, obs, pi_old):
        mean_old, std_old = pi_old
        _, pi = self.net(obs)
        mean, std = pi
        # start to calculate the kl-divergence
        kl = -torch.log(std / std_old) + (
            std.pow(2) + (mean - mean_old).pow(2)) / (2 * std_old.pow(2)) - 0.5
        return kl.sum(1, keepdim=True)

    # get the tensors
    def _get_tensors(self, obs):
        return torch.tensor(obs, dtype=torch.float32).unsqueeze(0)
        im_size = 32
        epc_seed = 0
        config = Config(input_ch=input_ch,
                        padded_im_size=padded_im_size,
                        num_classes=num_classes,
                        im_size=im_size,
                        epc_seed=epc_seed)
        dataset_sizes = {'train': 5e4, 'test': 1e4}
    if args.model in ['ResNet18', 'DenseNet3_40', 'VGG11_bn']:
        model = Network().construct(args.model, row)
    else:
        raise Exception('Unknown model argument: {}'.format(args.model))

    state_dict = torch.load(model_weights_path,
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(state_dict['model'], strict=True)
    model = model.to(device)

    model = model.eval()

    mean, std = get_mean_std(args.dataset)
    pad = int((row.padded_im_size - row.im_size) / 2)
    transform = transforms.Compose([
        transforms.Pad(pad),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

    full_dataset = getattr(datasets, args.dataset)
    subset_dataset = get_subset_dataset(
        full_dataset=full_dataset,
            self.conv_output = x[0, self.selected_filter]
            # Loss function is the mean of the output of the selected layer/filter
            # We try to minimize the mean of the output of that specific filter
            loss = -torch.mean(self.conv_output)
            print('Iteration:', str(i), 'Loss:',
                  "{0:.2f}".format(loss.data.numpy()))
            # Backward
            loss.backward()
            # Update image
            optimizer.step()
            # Recreate image
            self.created_image = recreate_image(processed_image)
            # Save image
            if i % 5 == 0:
                im_path = '../generated/layer_vis_l' + str(self.selected_layer) + \
                          '_f' + str(self.selected_filter) + '_iter' + str(i) + '.jpg'
                save_image(self.created_image, im_path)


if __name__ == '__main__':
    cnn_layer = 1
    filter_pos = 1
    # Fully connected layer is not needed

    pretrained_model = Network(20).to(device)
    pretrained_model.load_state_dict(torch.load('best_model_C.pt'))
    layer_vis = CNNLayerVisualization(pretrained_model, cnn_layer, filter_pos)

    # Layer visualization with pytorch hooks
    layer_vis.visualise_layer_with_hooks()
def get_tensors(x):
    return torch.tensor(x, dtype=torch.float32).unsqueeze(0)


if __name__ == '__main__':
    args = get_args()
    # create the environment
    env = gym.make(args.env_name)
    # build up the network
    net = Network(env.observation_space.shape[0], env.action_space.shape[0])
    # load the saved model
    model_path = args.save_dir + args.env_name + '/model.pt'
    network_model, filters = torch.load(
        model_path, map_location=lambda storage, loc: storage)
    net.load_state_dict(network_model)
    net.eval()
    for _ in range(10):
        obs = denormalize(env.reset(), filters.rs.mean, filters.rs.std)
        reward_total = 0
        for _ in range(10000):
            env.render()
            obs_tensor = get_tensors(obs)
            with torch.no_grad():
                _, (mean, _) = net(obs_tensor)
                action = mean.numpy().squeeze()
            obs, reward, done, _ = env.step(action)
            reward_total += reward
            obs = denormalize(obs, filters.rs.mean, filters.rs.std)
            if done:
                break