Ejemplo n.º 1
0
    def evaluate_fixed(self, test_iter, n_cl_sample):

        cum_loss = 0
        cum_prec = 0
        cum_rec = 0
        cum_size = 0

        for t in range(test_iter):

            words, context, target, cluster_iden, ixs = self.generate(
                n_cl_sample)
            input_x = torch.cat([words, context], dim=1)
            kernel = self.kernel_net(input_x)
            vals, vecs = custom_decomp()(kernel)
            subset = DPP()(vals, vecs)
            pick = subset.diag().mm(words).sum(0, keepdim=True)
            self.pred = self.pred_net(pick).squeeze()
            loss = nn.MSELoss()(self.pred, target)

            # Subset Statistics
            precision, recall, set_size = self.assess(subset.data, ixs)
            cum_loss += loss.data[0]
            cum_prec += precision
            cum_rec += recall
            cum_size += set_size

        print(cum_loss / test_iter, cum_prec / test_iter, cum_rec / test_iter,
              cum_size / test_iter)
        return (cum_loss / test_iter, cum_prec / test_iter,
                cum_rec / test_iter, cum_size / test_iter)
Ejemplo n.º 2
0
    def run(self, words, context, batch_size, alpha_iter):
        """
        This may be used by sample and by evaluate. 
        Samples once from DPP. 
        Can be used with any batch_size. 
        Returns a tensor of many subsets
        """
        set_size = self.set_size
        kernel_in = self.kernel_in
        kernel_out = self.kernel_out

        actions = self.saved_subsets = [[] for i in range(batch_size)]
        rewards = self.saved_losses =  [[] for i in range(batch_size)]
        cum_loss = 0.

        # Concatenate individual words and set context
        # Dimensions are batch_size x set_size x kernel_in
        batch_x = Variable(torch.cat([words, context], dim = 2))

        # Compute embedding of DPP kernel
        batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in))
        batch_kernel = batch_kernel.view(-1, set_size, kernel_out)

        for i, kernel in enumerate(batch_kernel):
            vals, vecs = custom_decomp()(kernel)
            for j in range(alpha_iter):
                subset = DPP()(vals, vecs)
                actions[i].append(subset)

        subset_tensor = torch.stack([torch.stack(subsets) for subsets in actions])

        return subset_tensor
Ejemplo n.º 3
0
    def sample(self):

        words, context, ixs, target = self.generate()
        input_x = torch.cat([words, context], dim=1)
        kernel = self.kernel_net(input_x)
        vals, vecs = custom_decomp()(kernel)
        subset = DPP()(vals, vecs)
        pick = subset.diag().mm(words).sum(0, keepdim=True)
        self.pred = self.pred_net(pick).squeeze()
        loss = nn.BCELoss()(self.pred, target)

        # Classification Accuracy
        label = torch.round(self.pred.data)
        acc = ((label == target.data).sum() / self.aspects_n)

        # Subset Statistics
        precision, recall, set_size = self.assess(subset.data, ixs)

        # Print
        print('Target is: ', target.data)
        print('Pred is: ', self.pred.data)
        print('Loss is:', loss.data[0])
        print('Acc is:', ((label == target.data).sum() / self.aspects_n))
        print('Subset is:', subset.data)
        print('Ix is:', ixs)
        print('Subset statistics are:', precision, recall, set_size)

        return words, context, ixs, target, self.pred, loss, subset
Ejemplo n.º 4
0
    def evaluate(self, test_iter):

        cum_loss = 0
        cum_acc = 0
        cum_prec = 0
        cum_rec = 0
        cum_size = 0

        for t in range(test_iter):

            words, context, ixs, target = self.generate()
            input_x = torch.cat([words, context], dim=1)
            kernel = self.kernel_net(input_x)
            vals, vecs = custom_decomp()(kernel)
            subset = DPP()(vals, vecs)
            pick = subset.diag().mm(words).sum(0, keepdim=True)
            self.pred = self.pred_net(pick).squeeze()
            loss = nn.BCELoss()(self.pred, target)

            # Classification Accuracy
            label = torch.round(self.pred.data)
            cum_acc += ((label == target.data).sum() / self.aspects_n)

            # Subset Statistics
            precision, recall, set_size = self.assess(subset.data, ixs)
            cum_loss += loss.data[0]
            cum_prec += precision
            cum_rec += recall
            cum_size += set_size

        print('Loss:', cum_loss / test_iter, 'Pred Acc:', cum_acc / test_iter,
              'Precision:', cum_prec / test_iter, 'Recall:',
              cum_rec / test_iter, 'Set Size:', cum_size / test_iter)
Ejemplo n.º 5
0
    def __call__(self, kernels, batched_words):

        batch_size, set_size, kernel_dim = kernels.size()
        batch_size, set_size, embd_dim = batched_words.size()
        alpha_iter = self.alpha_iter

        self.exp_sizes = exp_sizes = []
        self.saved_subsets = actions = [[] for i in range(batch_size)]
        self.saved_picks = picks = [[] for i in range(batch_size)]

        for i, (kernel, words) in enumerate(zip(kernels, batched_words)):
            vals, vecs = custom_decomp()(kernel)
            exp_size = (vals / (1 + vals)).sum()
            exp_sizes.append(exp_size)
            for j in range(alpha_iter):
                while True:  # to avoid zero subsets, problematic as not unbiased anymore, temp fix.
                    subset = DPP()(vals, vecs)
                    if subset.data.sum() >= 1:
                        break
                    else:
                        print("Zero Subset was produced. Re-sample")
                        continue
                actions[i].append(subset)
                pick = words.masked_select(
                    Variable(subset.data.byte().expand_as(words.t())).t())
                pick = pick.view(-1, embd_dim)
                picks[i].append(pick)

        return picks
Ejemplo n.º 6
0
    def train(self, train_iter, batch_size, lr, alpha_iter=1, baseline=True):
        """
        Training the model. 
        Doesn't use the forward pass as want to sample repeatedly!
        """
        set_size = self.set_size
        kernel_in = self.kernel_in
        kernel_out = self.kernel_out

        loss_log = 100
        optimizer = optim.Adam(self.kernel_net.parameters(), lr=lr)


        for t in range(train_iter):
            actions = self.saved_subsets = [[] for i in range(batch_size)]
            rewards = self.saved_losses =  [[] for i in range(batch_size)]

            cum_loss = 0.
            words, context, target = self.generate(batch_size)

            # Concatenate individual words and set context
            # Dimensions are batch_size x set_size x kernel_in
            batch_x = Variable(torch.cat([words, context], dim = 2))

            # Compute embedding of DPP kernel
            batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in))
            batch_kernel = batch_kernel.view(-1, set_size, kernel_out)

            for i, kernel in enumerate(batch_kernel):
                vals, vecs = custom_decomp()(kernel)
                for j in range(alpha_iter):
                    subset = DPP()(vals, vecs)
                    actions[i].append(subset)
                    loss, _, _, _, _ = self._assess(target[i], subset.data)
                    rewards[i].append(loss)
                    cum_loss += loss

            if baseline:
                self.saved_baselines = [compute_baseline(i) for i in self.saved_losses]
            else: 
                self.saved_baselines = self.saved_losses

            # Register the baselines
            for actions, rewards in zip(self.saved_subsets, self.saved_baselines):
                for action, reward in zip(actions, rewards):
                    action.reinforce(reward)

            pseudo_loss = torch.stack([torch.stack(subsets) for subsets in self.saved_subsets]).sum()
            pseudo_loss.backward(None)
            optimizer.step()
            optimizer.zero_grad()

            self.loss_dict[t].append(cum_loss / (batch_size * alpha_iter))

            if not ((t + 1) % loss_log):
                print("Loss at it ", t+1, " is: ", cum_loss / (batch_size * alpha_iter))  
Ejemplo n.º 7
0
    def forward(self, kernels, words):

        # need to set beforehand!
        s_ix = self.s_ix
        e_ix = self.e_ix

        self.exp_sizes = []

        assert s_ix != None and e_ix != None

        alpha_iter = self.alpha_iter
        batch_size = len(s_ix)
        embd_dim = words.size(1)

        output = []
        lengths = []
        actions = self.saved_subsets = [[] for i in range(batch_size)]

        for i, (s, e) in enumerate(zip(s_ix, e_ix)):
            V = kernels[s:e]
            word = words[s:e]
            vals, vecs = custom_decomp()(
                V)  # check if gradients work for non-square matrix
            exp_size = (vals / (1 + vals)).pow(2).sum()
            for j in range(alpha_iter):
                while True:  # to avoid zero subsets, problematic as not unbiased anymore, temp fix.
                    # subset = AllInOne()(V) # scrap this after gradient check!
                    subset = DPP()(vals, vecs)
                    if subset.data.sum() >= 1:
                        break
                    else:
                        print("Zero Subset was produced. Re-sample")
                        continue
                actions[i].append(subset)
                pick = subset.diag().mm(
                    word
                )  # but this creates zero rows, however it links the two graphs :)
                pick = pick.masked_select(
                    Variable(subset.data.byte().expand_as(pick.t())).t())
                pick = pick.view(-1, embd_dim)

                output.append(pick)
                lengths.append(pick.size(0))
            self.exp_sizes.append(exp_size)

        output = torch.cat(output, dim=0)
        cum_lengths = list(accumulate(lengths))

        self.s_ix = [ix1 - ix2 for (ix1, ix2) in zip(cum_lengths, lengths)]
        self.e_ix = cum_lengths

        return output
Ejemplo n.º 8
0
    def sample(self):

        words, context, target, cluster_iden, ixs = self.generate()
        input_x = torch.cat([words, context], dim=1)
        kernel = self.kernel_net(input_x)
        vals, vecs = custom_decomp()(kernel)
        subset = DPP()(vals, vecs)
        pick = subset.diag().mm(words).sum(0, keepdim=True)
        self.pred = self.pred_net(pick).squeeze()
        loss = nn.MSELoss()(self.pred, target)

        # Subset Statistics
        precision, recall, set_size = self.assess(subset.data, ixs)

        # Print
        print('Target is: ', target.data)
        print('Pred is: ', self.pred.data)
        print('Loss is:', loss.data[0])
        print('Subset is:', subset.data)
        print('Ix is:', ixs)
        print('Subset statistics are:', precision, recall, set_size)
Ejemplo n.º 9
0
    def evaluate(self, test_iter):

        set_size = self.set_size
        kernel_in = self.kernel_in
        kernel_out = self.kernel_out        
        set_size = self.set_size
        n_clusters = self.n_clusters
        kernel_in = self.kernel_in
        kernel_out = self.kernel_out
        embd_dim = self.pred_in
        dtype = self.dtype
        criterion = self.criterion
        
        cum_loss = 0.
        cum_prec = 0.
        cum_rec = 0.
        cum_ssize = 0.

        n_missed = 0.
        n_one = 0
        n_many = 0.
        n_perfect = 0.

        mean = 0.
        temp = 0.
        var = 0.

        criterion = self.criterion
        batch_size = 1
        embd_dim = self.pred_in

        for t in range(test_iter):
            picks = [[] for i in range(batch_size)]
            words, context, target, index = self.generate(batch_size)

            # Concatenate individual words and set context
            # Dimensions are batch_size x set_size x kernel_in
            batch_x = Variable(torch.cat([words, context], dim = 2))

            # Compute embedding of DPP kernel
            batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in))
            batch_kernel = batch_kernel.view(-1, set_size, kernel_out)

            for i, kernel in enumerate(batch_kernel):
                vals, vecs = custom_decomp()(kernel)
                subset = DPP()(vals, vecs)
                pick = subset.diag().mm(Variable(words[i])).sum(0, keepdim=True) 
                picks[i].append(pick)

                _, prec, rec, ssize  = self._big_assess(index[i], subset.data)
                missed, one, many, perfect = self._eval_assess(index, subset.data)
                cum_prec += prec
                cum_rec += rec
                cum_ssize += ssize
                n_missed += missed
                n_one += one
                n_many += many
                n_perfect += perfect 

            picks = torch.stack([torch.stack(pick) for pick in picks]).view(-1, embd_dim)
            preds = self.pred_net(picks).view(1, -1)

            targets = target.unsqueeze(0).expand_as(preds)
            loss = criterion(preds, Variable(targets, volatile=True))
            cum_loss += loss.data[0]

            delta = ssize - mean
            mean += delta / (t+1)
            delta2 = ssize - mean
            temp += delta * delta2
        
            var = temp / test_iter

        print("Average Subset Size: ", mean)
        print("Subset Variance: ", var)
        print("Average Loss", cum_loss / test_iter)
        print("n_missed share", n_missed / (test_iter * self.n_clusters))
        print("n_one share", n_one / (test_iter * self.n_clusters))
        print("n_many share", n_many / (test_iter * self.n_clusters))
        print("n_perfect share", n_perfect/ test_iter)
Ejemplo n.º 10
0
            cum_ssize = 0.
            words, context, target, index = self.generate(batch_size)

            # Concatenate individual words and set context
            # Dimensions are batch_size x set_size x kernel_in
            batch_x = Variable(torch.cat([words, context], dim = 2))

            # Compute embedding of DPP kernel
            batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in))
            batch_kernel = batch_kernel.view(-1, set_size, kernel_out)

            if reg:
                reg_loss = 0

            for i, kernel in enumerate(batch_kernel):
                vals, vecs = custom_decomp()(kernel)
                for j in range(alpha_iter):
                    subset = DPP()(vals, vecs)
                    pick = subset.diag().mm(.a(words[i])).sum(0, keepdim=True) 
                    actions[i].append(subset)
                    picks[i].append(pick)
                    _, prec, rec, ssize  = self._big_assess(index[i], subset.data)
                    cum_prec += prec
                    cum_rec += rec
                    cum_ssize += ssize

                if reg:
                    exp_ssize = (vals / (1 + vals)).sum()
                    reg_loss += reg * (exp_ssize - reg_mean)**2

            picks = torch.stack([torch.stack(pick) for pick in picks]).view(-1, embd_dim)
Ejemplo n.º 11
0
    def train(self,
              train_steps,
              batch_size=1,
              sample_iter=1,
              lr=1e-3,
              baseline=False,
              reg=0,
              reg_mean=0):

        if baseline:
            assert sample_iter > 1

        params = [{
            'params': self.kernel_net.parameters()
        }, {
            'params': self.pred_net.parameters()
        }]
        optimizer = optim.Adam(params, lr=lr)

        train_iter = train_steps * batch_size

        cum_loss = 0
        cum_prec = 0
        cum_rec = 0
        cum_size = 0

        for t in range(train_iter):

            actions = self.saved_subsets = []
            rewards = self.saved_losses = []
            picks = []

            words, context, ixs, target = self.generate()
            input_x = torch.cat([words, context], dim=1)
            kernel = self.kernel_net(input_x)
            vals, vecs = custom_decomp()(kernel)

            pred_loss = 0

            for j in range(sample_iter):

                subset = DPP()(vals, vecs)
                actions.append(subset)
                pick = subset.diag().mm(words).sum(0, keepdim=True)
                self.pred = self.pred_net(pick).squeeze()
                loss = nn.BCELoss()(self.pred, target)
                rewards.append(loss.data[0])
                pred_loss += loss

                # For the statistics
                precision, recall, set_size = self.assess(subset.data, ixs)
                cum_loss += loss.data[0]
                cum_prec += precision
                cum_rec += recall
                cum_size += set_size

            # Compute baselines
            if baseline:
                self.saved_baselines = compute_baseline(self.saved_losses)
            else:
                self.saved_baselines = self.saved_losses

            # Register rewards
            for action, reward in zip(self.saved_subsets,
                                      self.saved_baselines):
                action.reinforce(reward)

            # Apply Regularization
            total_loss = pred_loss

            if reg:
                card = (vals / (1 + vals)).sum()
                reg_loss = sample_iter * reg * ((card - reg_mean)**2)

                total_loss += reg_loss

            total_loss.backward()

            if not ((t + 1) % batch_size):
                optimizer.step()
                optimizer.zero_grad()

                if not ((t + 1) % (batch_size * 100)):
                    print(cum_loss / (batch_size * sample_iter))

                self.loss_dict[self.counter].append(cum_loss /
                                                    (batch_size * sample_iter))
                self.prec_dict[self.counter].append(cum_prec /
                                                    (batch_size * sample_iter))
                self.rec_dict[self.counter].append(cum_rec /
                                                   (batch_size * sample_iter))
                self.ssize_dict[self.counter].append(
                    cum_size / (batch_size * sample_iter))

                self.counter += 1

                cum_loss = 0
                cum_prec = 0
                cum_rec = 0
                cum_size = 0