Esempio n. 1
0
    def __init__(self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        seq2vec_encoder: Seq2VecEncoder,
        dropout: float = 0.,
        label_namespace: str = 'labels',
        initializer: InitializerApplicator = InitializerApplicator(),
        pretrained_path: str = None) -> None:
        super().__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._seq2vec_encoder = seq2vec_encoder
        self._classifier_input_dim = self._seq2vec_encoder.get_output_dim()

        if dropout:
            self._dropout = nn.Dropout(dropout)
        else:
            self._dropout = lambda x: x

        self._num_labels = vocab.get_vocab_size(namespace=label_namespace)
        self._ln = nn.LayerNorm(self._classifier_input_dim)
        self._classification_layer = nn.Linear(self._classifier_input_dim, self._num_labels)
        self._accuracy = CategoricalAccuracy()
        self._loss = nn.CrossEntropyLoss()

        initializer(self)

        if pretrained_path:
            with open(pretrained_path, 'rb') as f:
                self.load_state_dict(torch.load(f))
Esempio n. 2
0
    def evaluate_predictor(self):
        self.from_pretrained()

        eval_data = self.downsample()
        metric = CategoricalAccuracy()
        batch_size = 32

        bar = tqdm(range(0, len(eval_data), batch_size))
        for bid in bar:
            instances = [
                eval_data[i]
                for i in range(bid, min(bid + batch_size, len(eval_data)))
            ]
            outputs = self.predictor.predict_batch_instance(instances)
            preds, labels = [], []
            for inst, outp in zip(instances, outputs):
                preds.append([outp['probs']])
                label_idx = inst.fields['label'].label
                if isinstance(inst.fields['label'].label, str):
                    label_idx = self.vocab.get_token_index(label_idx, 'labels')
                labels.append([label_idx])
            metric(predictions=torch.tensor(preds),
                   gold_labels=torch.tensor(labels))
            bar.set_description("{:5.2f}".format(metric.get_metric()))
        print(f"Evaluate on {self.config.data_split}, the result is ",
              metric.get_metric())
Esempio n. 3
0
 def __init__(self,
              word_embeddings: TextFieldEmbedder,
              encoder: Seq2VecEncoder,
              vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.word_embeddings = word_embeddings
     self.encoder = encoder
     self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                       out_features=vocab.get_vocab_size('labels'))
     self.accuracy = CategoricalAccuracy()
     self.loss_function = torch.nn.CrossEntropyLoss()
 def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder):
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size("labels")
     self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
     self.accuracy = CategoricalAccuracy()
     self.macrof1 = FBetaMeasure(average='macro')
     self.microf1 = FBetaMeasure(average='micro')
     self.weightedf1 = FBetaMeasure(average='weighted')
Esempio n. 5
0
class LstmClassifier(Model):
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder
        self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                          out_features=vocab.get_vocab_size('labels'))
        self.accuracy = CategoricalAccuracy()
        self.loss_function = torch.nn.CrossEntropyLoss()

    def forward(self,
                tokens: Dict[str, torch.Tensor],
                label: torch.Tensor = None) -> torch.Tensor:
        mask = get_text_field_mask(tokens)
        embeddings = self.word_embeddings(tokens)
        encoder_out = self.encoder(embeddings, mask)
        logits = self.hidden2tag(encoder_out)

        output = {"logits": logits}
        if label is not None:
            self.accuracy(logits, label)
            output["loss"] = self.loss_function(logits, label)

        return output
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        return {"accuracy": self.accuracy.get_metric(reset)}
Esempio n. 6
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 pos_weight: float,
                 use_features: bool,
                 use_features_post_encoding: bool = False) -> None:
        super().__init__(vocab)
        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.use_features = use_features
        self.use_features_post_encoding = use_features_post_encoding
        self.pos_weight = pos_weight
        self.accuracy = CategoricalAccuracy()
        self.f1 = F1Measure(1)

        linear_in_features = encoder.get_output_dim() + (
            66 if use_features_post_encoding else 0)
        self.transfer_predictor = torch.nn.Linear(
            in_features=linear_in_features, out_features=2)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 title_encoder: Seq2VecEncoder,
                 abstract_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(AcademicPaperClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size('labels')
        self.title_encoder = title_encoder
        self.abstract_encoder = abstract_encoder
        self.classifier_feedforward = classifier_feedforward
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
class SimpleClassifier(Model):
    def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder
        num_labels = vocab.get_vocab_size("labels")
        self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
        self.accuracy = CategoricalAccuracy()
        self.macrof1 = FBetaMeasure(average='macro')
        self.microf1 = FBetaMeasure(average='micro')
        self.weightedf1 = FBetaMeasure(average='weighted')

    def forward(self, text: Dict[str, torch.Tensor],
                label: torch.Tensor) -> Dict[str, torch.Tensor]:
        # Shape: (batch_size, num_tokens, embedding_dim)
        embedded_text = self.embedder(text)
        # Shape: (batch_size, num_tokens)
        mask = util.get_text_field_mask(text)
        # Shape: (batch_size, encoding_dim)
        encoded_text = self.encoder(embedded_text, mask)
        # Shape: (batch_size, num_labels)
        logits = self.classifier(encoded_text)
        self.accuracy(logits, label)
        self.macrof1(logits, label)
        self.microf1(logits, label)
        self.weightedf1(logits, label)
        # Shape: (batch_size, num_labels)
        probs = torch.nn.functional.softmax(logits)
        # Shape: (1,)
        loss = torch.nn.functional.cross_entropy(logits, label)
        return {'loss': loss, 'probs': probs}

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        result_macro = self.macrof1.get_metric(reset)
        result_micro = self.microf1.get_metric(reset)
        result_weighted = self.weightedf1.get_metric(reset)
        return {
            "accuracy": self.accuracy.get_metric(reset),
            "macrof1_precision": result_macro["precision"],
            "macrof1_recall": result_macro["recall"],
            "macrof1_fscore": result_macro["fscore"],
            "microf1_precision": result_micro["precision"],
            "microf1_recall": result_micro["recall"],
            "microf1_fscore": result_macro["fscore"],
            "weightedf1_precision": result_weighted["precision"],
            "weightedf1_recall": result_weighted["recall"],
            "weightedf1_fscore": result_weighted["fscore"]
        }
Esempio n. 9
0
class RnnClassifier(Model):    
    def __init__(self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        seq2vec_encoder: Seq2VecEncoder,
        dropout: float = 0.,
        label_namespace: str = 'labels',
        initializer: InitializerApplicator = InitializerApplicator(),
        pretrained_path: str = None) -> None:
        super().__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._seq2vec_encoder = seq2vec_encoder
        self._classifier_input_dim = self._seq2vec_encoder.get_output_dim()

        if dropout:
            self._dropout = nn.Dropout(dropout)
        else:
            self._dropout = lambda x: x

        self._num_labels = vocab.get_vocab_size(namespace=label_namespace)
        self._ln = nn.LayerNorm(self._classifier_input_dim)
        self._classification_layer = nn.Linear(self._classifier_input_dim, self._num_labels)
        self._accuracy = CategoricalAccuracy()
        self._loss = nn.CrossEntropyLoss()

        initializer(self)

        if pretrained_path:
            with open(pretrained_path, 'rb') as f:
                self.load_state_dict(torch.load(f))
        
    def forward(self, tokens, label=None):
        embedded_text = self._text_field_embedder(tokens)
        mask = get_text_field_mask(tokens).float()
        encoded_text = self._dropout(self._seq2vec_encoder(embedded_text, mask=mask))
        encoded_text = self._ln(encoded_text)
        logits = self._classification_layer(encoded_text)
        probs = F.softmax(logits, dim=1)
        output_dict = {'logits': logits, 'probs': probs}
        if label is not None:
            loss = self._loss(logits, label.long().view(-1))
            output_dict['loss'] = loss
            self._accuracy(logits, label)
        return output_dict
    def get_metrics(self, reset=False):
        return {'accuracy': self._accuracy.get_metric(reset)}
Esempio n. 10
0
    def __init__(
        self,
        pretrained_model: str,
        discriminative_loss_weight: float = 0,
        vocab: Vocabulary = Vocabulary(),
        softmax_over_vocab: bool = False,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super(GNLI, self).__init__(vocab)
        # Check the arguments of `__init__()`.
        assert pretrained_model in ['bart.large']
        assert discriminative_loss_weight >= 0 and discriminative_loss_weight <= 1

        # Load in BART and extend the embeddings layer by three for the label embeddings.
        self._bart = torch.hub.load('pytorch/fairseq', pretrained_model).model
        self._extend_embeddings()

        # Ignore padding indices when calculating generative loss.
        assert self._bart.encoder.padding_idx == 1
        self._generative_loss_fn = torch.nn.CrossEntropyLoss(
            ignore_index=self._bart.encoder.padding_idx)
        self._discriminative_loss_fn = torch.nn.NLLLoss()
        self._discriminative_loss_weight = discriminative_loss_weight

        self._softmax_over_vocab = softmax_over_vocab
        if self._softmax_over_vocab:
            self.effective_vocab_size = self.vocab_size
        else:
            self.effective_vocab_size = self.vocab_size + self.label_size

        self.metrics = {
            'accuracy': CategoricalAccuracy(),
            'disc_loss': Average(),
            'gen_loss': Average()
        }

        initializer(self)
        number_params = sum([
            numpy.prod(p.size()) for p in list(self.parameters())
            if p.requires_grad
        ])
        logger.info('Number of trainable model parameters: %d', number_params)
Esempio n. 11
0
class WordEmbeddingsModel(Model):
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 pos_weight: float,
                 use_features: bool,
                 use_features_post_encoding: bool = False) -> None:
        super().__init__(vocab)
        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.use_features = use_features
        self.use_features_post_encoding = use_features_post_encoding
        self.pos_weight = pos_weight
        self.accuracy = CategoricalAccuracy()
        self.f1 = F1Measure(1)

        linear_in_features = encoder.get_output_dim() + (
            66 if use_features_post_encoding else 0)
        self.transfer_predictor = torch.nn.Linear(
            in_features=linear_in_features, out_features=2)

    def forward(self, tokens: Dict[str, Tensor], features: Tensor,
                is_transferred: Tensor) -> Dict[str, Tensor]:

        mask = get_text_field_mask(tokens)
        embeddings = self.text_field_embedder(tokens)

        if not self.use_features or self.use_features_post_encoding:
            encoded_input = self.encoder(embeddings, mask)
        else:
            encoded_input = self.encoder(
                torch.cat([embeddings, features], dim=-1), mask)

        if self.use_features_post_encoding:
            tag_logits = self.transfer_predictor(
                torch.cat([encoded_input, features], dim=-1))
        else:
            tag_logits = self.transfer_predictor(encoded_input)

        output = {"tag_logits": tag_logits}

        if is_transferred is not None:
            self.accuracy(tag_logits, is_transferred, mask)
            self.f1(tag_logits, is_transferred, mask)
            # overweight positive instances
            weight_mask = self.generate_weight_mask(is_transferred, mask)
            output["loss"] = sequence_cross_entropy_with_logits(
                tag_logits, is_transferred, weight_mask)
        return output

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        precision, recall, f1 = self.f1.get_metric(reset)
        return {
            "acc": self.accuracy.get_metric(reset),
            "prec": precision,
            "recl": recall,
            "f1": f1
        }

    def generate_weight_mask(self, gold: torch.Tensor,
                             mask: torch.Tensor) -> torch.Tensor:
        # hacky. the idea is that if x is self.pos_weight, we want:
        #   (0 + p)m = (1-x)
        #   (1 + p)m = x
        m = 2 * self.pos_weight - 1
        p = (1 - self.pos_weight) / m
        return (gold.float() + p) * m * mask.float()