Ejemplo n.º 1
0
    def forward(self, data: BatchHolder):

        if self.use_attention:
            output = data.hidden
            mask = data.masks
            attn = self.attention(data)

            if self.use_regulariser_attention:
                data.reg_loss = 5 * self.regularizer_attention.regularise(
                    data.seq, output, mask, attn)

            if isTrue(data, 'detach'):
                attn = attn.detach()

            if isTrue(data, 'permute'):
                permutation = data.generate_permutation()
                attn = torch.gather(attn, -1,
                                    torch.LongTensor(permutation).to(device))

            context = (attn.unsqueeze(-1) * output).sum(1)
            data.attn = attn
        else:
            context = data.last_hidden

        predict = self.decode(context)
        data.predict = predict
Ejemplo n.º 2
0
    def get_output_from_logodds(self, data: BatchHolder):
        attn_logodds = data.attn_logodds  #(B, L)
        attn = masked_softmax(attn_logodds, data.masks)

        data.attn_volatile = attn
        data.hidden_volatile = data.hidden

        self.get_output(data)
Ejemplo n.º 3
0
    def get_context(self, data: BatchHolder):
        output = data.hidden
        mask = data.masks
        attn = self.attention(data.seq, output, mask)
        if self.use_regulariser_attention:
            data.reg_loss = 5 * self.regularizer_attention.regularise(data.seq, output, mask, attn)

        if isTrue(data, 'detach'):
            attn = attn.detach()

        if isTrue(data, 'permute'):
            permutation = data.generate_permutation()
            attn = torch.gather(attn, -1, torch.LongTensor(permutation).to(device))
        return(attn.unsqueeze(-1) * output).sum(1)
Ejemplo n.º 4
0
    def get_output(self, data:BatchHolder) :
        output = data.hidden_volatile #(B, L, H)
        attn = data.attn_volatile #(B, *, L)

        if len(attn.shape) == 3 :
            context = (attn.unsqueeze(-1) * output.unsqueeze(1)).sum(2) #(B, *, H)
            predict = self.decode(context)
        else :
            context = (attn.unsqueeze(-1) * output).sum(1)
            predict = self.decode(context)

        data.predict_volatile = predict
Ejemplo n.º 5
0
def train_dataset_and_temp_scale(dataset, encoders):
    for e in encoders:
        config = configurations[e](dataset)
        trainer = Trainer(dataset, config=config, _type=dataset.trainer_type)
        trainer.train(dataset.train_data,
                      dataset.dev_data,
                      n_iters=8,
                      save_on_metric=dataset.save_on_metric)
        evaluator = Evaluator(dataset,
                              trainer.model.dirname,
                              _type=dataset.trainer_type)

        print("Temperature-scaling..")
        orig_model = evaluator.model
        dev_x_tensor = BatchHolder(dataset.dev_data.X).seq
        dev_x_tensor_lengths = BatchHolder(dataset.dev_data.X).lengths
        dev_x_tensor_masks = BatchHolder(dataset.dev_data.X).masks
        valid_dataset = TensorDataset(
            dev_x_tensor, dev_x_tensor_lengths, dev_x_tensor_masks,
            torch.from_numpy(np.array(dataset.dev_data.y)))
        valid_loader = DataLoader(valid_dataset, batch_size=1)

        scaled_model = ModelWithTemperature(orig_model)
        scaled_model.set_temperature(valid_loader)
Ejemplo n.º 6
0
 def get_attention(self, data: BatchHolder):
     output = data.hidden_volatile
     mask = data.masks
     attn = self.attention(data)
     data.attn_volatile = attn