예제 #1
0
    def loss(self, span_labels, seq_scores, seq_mask, span_map):

        loss = []
        for k, scorer in self.scorers.items():
            ls = scorer.loss(span_labels[k], seq_scores[k], seq_mask, span_map)
            loss.append(ls)

        loss = aggregate(torch.stack(loss), self.loss_reduction)

        return loss
    def loss(self, labels, scores):

        loss = []
        for k, classifier in self.classifiers.items():
            k = self.from_key(k)
            ls = classifier.loss(labels[k], scores[k])
            loss.append(ls)

        loss = aggregate(torch.stack(loss), self.loss_reduction)

        return loss
예제 #3
0
    def loss(self, labels, top_scores, top_mask, top_indices):

        loss = []
        for k, scorer in self.scorers.items():
            ab = self.from_key(k)
            ls = scorer.loss(labels[ab], top_scores[ab], top_mask, top_indices)
            loss.append(ls)

        loss = aggregate(torch.stack(loss), self.loss_reduction)

        return loss
예제 #4
0
    def loss(self, labels, scores, mask):

        loss = []
        for k, scorer in self.scorers.items():
            ls = scorer.loss(labels[k], scores[k], mask)
            loss.append(ls)

            _, pred = scores[k].max(-1)
            true = (labels[k] > 0).sum().tolist()
            pos = (pred > 0).sum().tolist()

        loss = aggregate(torch.stack(loss), self.loss_reduction)

        return loss
    def loss(self,
             doc_labels,
             doc_scores,
             sent_labels=None,
             sent_scores=None,
             as_dict=False):

        doc_loss = OrderedDict()
        sent_loss = OrderedDict()
        for k, classifier in self.classifiers.items():

            dl, sl = classifier.loss( \
                    doc_labels = doc_labels[k],
                    doc_scores = doc_scores[k],
                    sent_labels = None if sent_labels is None else sent_labels[k],
                    sent_scores = None if sent_scores is None else sent_scores[k])

            doc_loss[k] = dl
            sent_loss[k] = sl

        if as_dict:
            return (doc_loss, sent_loss)

        else:
            doc_loss = [v for k, v in doc_loss.items()]
            doc_loss = aggregate(torch.stack(doc_loss), self.loss_reduction)

            if self.use_sent_objective:
                sent_loss = [v for k, v in sent_loss.items()]
                sent_loss = aggregate(torch.stack(sent_loss),
                                      self.loss_reduction)

            else:
                sent_loss = None

        return (doc_loss, sent_loss)
예제 #6
0
    def loss(self, y_true, y_pred, span_map=None):

        span_loss, role_loss  = self.relation_extractor.loss( \
                            span_labels =       y_true['span_labels'],
                            span_scores =       y_pred['span_scores'],
                            span_mask =         y_true['span_mask'],
                            role_labels =       y_true['role_labels'],
                            top_role_scores =   y_pred['top_role_scores'],
                            top_span_mask =     y_pred['top_span_mask'],
                            top_indices =       y_pred['top_indices'],
                            )

        loss_dict = OrderedDict()
        loss_dict["span_loss"] = span_loss
        loss_dict["role_loss"] = role_loss

        loss = torch.stack([v for k, v in loss_dict.items()])
        loss = aggregate(loss, self.loss_reduction)

        return (loss, loss_dict)
예제 #7
0
    def loss(self, y_true, y_pred, span_map=None):

        loss_dict = OrderedDict()

        if self.use_doc_classifier:

            doc_loss, sent_loss = self.doc_classifier.loss( \
                            doc_labels = y_true["doc_labels"],
                            doc_scores = y_pred["doc_scores"],
                            sent_labels = y_true["sent_labels"],
                            sent_scores = y_pred["sent_scores"],
                            as_dict = True)

            for k, v in doc_loss.items():
                loss_dict[f"doc_{k}"] = v

            for k, v in sent_loss.items():
                loss_dict[f"sent_{k}"] = v


        if self.use_span_classifier:

            span_labels = nest_dict(y_true["span_labels"])
            span_scores = y_pred["span_scores"]
            span_mask = y_true["span_mask"]
            seq_mask = y_true["seq_mask"]
            seq_scores = y_pred["seq_scores"]
            role_labels = nest_dict(y_true["role_labels"])
            top_role_scores = y_pred["top_role_scores"]
            top_span_mask = y_pred["top_span_mask"]
            top_indices = y_pred["top_indices"]

            span_loss = []
            role_loss = []

            for i in range(len(span_labels)):
                span_ls, role_ls  = self.relation_extractor.loss( \
                                    span_labels = span_labels[i],
                                    span_scores = span_scores[i],
                                    span_mask = span_mask[i],
                                    role_labels = role_labels[i],
                                    top_role_scores = top_role_scores[i],
                                    top_span_mask = top_span_mask[i],
                                    top_indices = top_indices[i],
                                    seq_scores = seq_scores[i],
                                    seq_mask = seq_mask[i],
                                    span_map = span_map
                                    )
                span_loss.append(span_ls)
                role_loss.append(role_ls)

            span_loss = aggregate(torch.stack(span_loss), self.loss_reduction)
            role_loss = aggregate(torch.stack(role_loss), self.loss_reduction)

            loss_dict["span"] = span_loss
            loss_dict["role"] = role_loss


        loss_dict = OrderedDict([(k, v) for k, v in loss_dict.items() if v is not None])
        loss = torch.stack([v for k, v in loss_dict.items()])
        loss = aggregate(loss, self.loss_reduction)
        return (loss, loss_dict)
예제 #8
0
    def fit(self, X, y, device=None, path=None, shuffle=True):
        '''


        Parameters
        ----------

        X: documents as list of strings [doc [str]]
        y: labels as list of dictionarys

        '''

        logging.info('')
        logging.info('=' * 72)
        logging.info("Fit")
        logging.info('=' * 72)

        # Get/set device
        set_model_device(self, device)

        # Configure training mode
        self.train()

        # Create data set
        dataset = self.dataset_class( \
                                X = X,
                                y = y,
                                pretrained = self.pretrained,
                                device = device,
                                doc_definition = self.doc_definition,
                                sent_definition = self.sent_definition,
                                max_length = self.max_length,
                                max_sent_count = self.max_sent_count,
                                linebreak_bound = self.linebreak_bound,
                                keep_ws = self.keep_ws)

        # Create data loader
        dataloader = DataLoader(dataset,  \
                                shuffle = shuffle,
                                batch_size = self.batch_size)

        # Create optimizer
        '''
        https://github.com/huggingface/transformers/issues/657

        pretrained = model.bert.parameters()
        # Get names of pretrained parameters (including `bert.` prefix)
        pretrained_names = [f'bert.{k}' for (k, v) in model.bert.named_parameters()]

        new_params= [v for k, v in model.named_parameters() if k not in pretrained_names]

        optimizer = AdamW(
            [{'params': pretrained}, {'params': new_params, 'lr': learning_rate * 10}],
            lr=learning_rate,
        )




        )


        '''

        if self.lr_ratio == 1:
            optimizer = AdamW(self.parameters(), lr=self.lr)
        else:
            pretrained = self.bert.parameters()
            pretrained_names = [
                f'bert.{k}' for (k, v) in self.bert.named_parameters()
            ]
            new_params = [
                v for k, v in self.named_parameters()
                if k not in pretrained_names
            ]
            optimizer = AdamW([{
                'params': pretrained
            }, {
                'params': new_params,
                'lr': self.lr * self.lr_ratio
            }],
                              lr=self.lr)
        # define cross entropy
        #cross_entropy  = nn.NLLLoss(reduction=self.loss_reduction)

        # Create loss plotter
        plotter = PlotLoss(path=path)

        # Create prf aggregator
        prf_agg = PRFAggregator()

        # Loop on epochs
        pbar = tqdm(total=self.num_epochs)
        for j in range(self.num_epochs):

            loss_epoch = 0
            losses_epoch = OrderedDict()
            prf = []

            # Loop on mini-batches
            for i, (input_ids, attention_mask, doc_labels,
                    sent_labels) in enumerate(dataloader):

                verbose = False  #(i == 0) and (j == 0)

                # Reset gradients
                self.zero_grad()

                doc_scores, sent_scores = self(input_ids,
                                               attention_mask,
                                               verbose=verbose)

                loss_dict = OrderedDict()
                for k in doc_labels:
                    loss_dict[f"doc_{k[0:3]}"] = F.cross_entropy( \
                                                    input = doc_scores[k],
                                                    target = doc_labels[k],
                                                    reduction = self.loss_reduction)

                if self.use_sent_objective:
                    for k in doc_labels:
                        ls = []
                        for t in sent_labels[k]:
                            scores = sent_scores[k][t]
                            labels = sent_labels[k][t]

                            doc_count, sent_count, _ = tuple(scores.shape)

                            scores = scores.view(doc_count * sent_count, -1)
                            labels = labels.view(doc_count * sent_count)

                            l = F.cross_entropy( \
                                input = scores,
                                target = labels,
                                reduction = self.loss_reduction)
                            ls.append(l)
                        ls = aggregate(torch.stack(ls), self.loss_reduction)
                        loss_dict[f"sent_{k[0:3]}"] = ls

                loss = [v for k, v in loss_dict.items() if v is not None]
                loss = aggregate(torch.stack(loss), self.loss_reduction)

                plotter.update_batch(loss, loss_dict)

                #prf_agg.update_counts(self.perf_counts(y_true, y_pred))

                # Backprop loss
                loss.backward()

                loss_epoch += loss.item()
                for k, v in loss_dict.items():
                    if i == 0:
                        losses_epoch[k] = v.item()
                    else:
                        losses_epoch[k] += v.item()

                # Clip loss
                clip_grad_norm_(self.parameters(), self.grad_max_norm)

                # Update
                optimizer.step()

            plotter.update_epoch(loss_epoch, losses_epoch)

            msg = []
            msg.append('epoch={}'.format(j))
            msg.append('{}={:.1e}'.format('Total', loss_epoch))
            for k, ls in losses_epoch.items():
                msg.append('{}={:.1e}'.format(k, ls))

            #msg.append(prf_agg.prf())
            #prf_agg.reset()

            msg = ", ".join(msg)
            pbar.set_description(desc=msg)
            pbar.update()

        pbar.close()

        return True