def __init__(self,
                 vocab: Vocabulary,
                 bert_model: Union[str, BertModel],
                 dropout: float = 0.0,
                 num_labels: int = None,
                 index: str = "tokens",
                 label_namespace: str = "labels",
                 trainable: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator()) -> None:
        super().__init__(vocab)

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        self.bert_model.requires_grad = trainable

        in_features = self.bert_model.config.hidden_size

        #if num_labels:
        out_features = 1
        #else:
        #    out_features = vocab.get_vocab_size(label_namespace)

        self._dropout = torch.nn.Dropout(p=dropout)

        self._classification_layer = torch.nn.Linear(in_features, out_features)
        #self._accuracy = CategoricalAccuracy()
        #self._loss = torch.nn.CrossEntropyLoss()
        self._index = index
Example #2
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        projection_dim: int = 300,
        dropout: float = 0.0,
        normalize_coverage: bool = False,
        label_namespace: str = "labels",
        trainable: bool = True,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super().__init__(vocab)

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        self.bert_model.requires_grad = trainable
        self._projection_dim = projection_dim
        in_features = self._projection_dim
        self._normalize_coverage = normalize_coverage
        self._dropout = torch.nn.Dropout(p=dropout)

        self._classification_layer = torch.nn.Linear(in_features, 1)
        self._projection_layer = torch.nn.Sequential(
            torch.nn.Linear(self.bert_model.config.hidden_size,
                            self._projection_dim), torch.nn.ReLU())
        #self._classification_layer = self.ff(in_features, self.bert_model.config.hidden_size, 1)

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self._classification_layer)
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        dropout: float = 0.0,
        num_labels: int = None,
        index: str = "bert",
        label_namespace: str = "labels",
        trainable: bool = True,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super().__init__(vocab)

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model
        self.bert_model.requires_grad = trainable

        in_features = self.bert_model.config.hidden_size
        if num_labels:
            out_features = num_labels
        else:
            out_features = vocab.get_vocab_size(label_namespace)

        self._dropout = torch.nn.Dropout(p=dropout)
        self._tagger_layer = torch.nn.Linear(in_features, out_features)
        self._span_f1 = SpanBasedF1Measure(vocab,
                                           label_namespace,
                                           label_encoding='BIO')
        self._loss = torch.nn.CrossEntropyLoss()
        self._index = index
        initializer(self._tagger_layer)
Example #4
0
    def init_bert_model(self, bert_model, trainable) -> Model:
        if isinstance(bert_model, str):
            bert_model = PretrainedBertModel.load(bert_model)

        for param in bert_model.parameters():
            param.requires_grad = trainable

        return bert_model
Example #5
0
    def __init__(self, vocab: Vocabulary, bert_model: Union[str, BertModel]):
        super().__init__(vocab)

        bert_model = PretrainedBertModel.load(bert_model)
        self.bert_for_domain_classification = BertForClassification(vocab, bert_model, label_namespace='domain_labels')
        self.bert_for_intent_classification = BertForClassification(vocab, bert_model, label_namespace='intent_labels')
        self.bert_for_slot_filling = BertForTagger(vocab, bert_model, label_namespace='slots_labels')

        self._accuracy = SentenceAccuracy()
    def _get_bert_word_embedder(self):
        pretrained_model = self.bert_file_path
        bert_model = PretrainedBertModel.load(pretrained_model, cache_model=False)
        for param in bert_model.parameters():
            param.requires_grad = self.configuration['train_bert']
        bert_embedder = BertEmbedder(bert_model=bert_model, top_layer_only=True)

        bert_word_embedder: TextFieldEmbedder = BasicTextFieldEmbedder({"tokens": bert_embedder},
                                                                       # we'll be ignoring masks so we'll need to set this to True
                                                                       allow_unmatched_keys=True)
        return bert_word_embedder
Example #7
0
    def __init__(self,
                 pretrained_model: Union[str, BertModel],
                 requires_grad: bool = True) -> None:
        super().__init__()

        if isinstance(pretrained_model, str):
            model = PretrainedBertModel.load(pretrained_model)
        else:
            model = pretrained_model

        self.pooler = model.pooler
        self.pooler.requires_grad = requires_grad
        self._embedding_dim = model.config.hidden_size
Example #8
0
    def __init__(self,
                 pretrained_model: str,
                 max_pieces: int = 512,
                 requires_grad: bool = False,
                 top_layer_only: bool = False) -> None:
        model = PretrainedBertModel.load(pretrained_model)

        for param in model.parameters():
            param.requires_grad = requires_grad

        super().__init__(bert_model=model,
                         max_pieces=max_pieces,
                         top_layer_only=top_layer_only)
Example #9
0
    def __init__(self,
                 bert_model: Union[str, BertModel],
                 dropout: float = 0.0,
                 trainable: bool = True):
        super().__init__()

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        self._dropout = torch.nn.Dropout(p=dropout)
        self._index = "tokens"
        self._train_layers = 3
        if trainable:
            self.fine_tune()
Example #10
0
    def __init__(self,
                 embedding_file,
                 vocab=None,
                 dropout: float = 0.4,
                 trainable: bool = True):
        super().__init__()

        if isinstance(embedding_file, str):
            self.bert_model = PretrainedBertModel.load(embedding_file)
        else:
            self.bert_model = embedding_file

        self._dropout = torch.nn.Dropout(p=dropout)
        self._index = "tokens"
        self._train_layers = 3
        if trainable:
            self.fine_tune()
Example #11
0
    def __init__(self,
                 pretrained_model: Union[str, BertModel],
                 requires_grad: bool = True,
                 dropout: float = 0.0) -> None:
        super().__init__()

        if isinstance(pretrained_model, str):
            model = PretrainedBertModel.load(pretrained_model)
        else:
            model = pretrained_model

        self._dropout = torch.nn.Dropout(p=dropout)

        self.pooler = model.pooler
        for param in self.pooler.parameters():
            param.requires_grad = requires_grad
        self._embedding_dim = model.config.hidden_size
Example #12
0
    def __init__(self,
                 pretrained_model: Union[str, BertModel],
                 dropout: float = 0.0) -> None:
        super().__init__()

        if isinstance(pretrained_model, str):
            model = PretrainedBertModel.load(pretrained_model)
        else:
            model = pretrained_model

        self._dropout = torch.nn.Dropout(p=dropout)

        self.bert = model
        self._embedding_dim = model.config.hidden_size

        self.dense = nn.Linear(4 * self._embedding_dim, self._embedding_dim)
        self.activation = nn.Tanh()
    def test_caching(self):
        model1 = PretrainedBertModel.load("testing caching")
        model2 = PretrainedBertModel.load("testing caching")
        assert model1 is model2

        model3 = PretrainedBertModel.load("testing not caching", cache_model=False)
        model4 = PretrainedBertModel.load("testing not caching", cache_model=False)
        assert model3 is not model4

        model5 = PretrainedBertModel.load("name1")
        model6 = PretrainedBertModel.load("name2")
        assert model5 is not model6
Example #14
0
    def __init__(
        self,
        bert_model: Union[str, BertModel],
        requires_grad: bool = True,
        index: str = "bert",
    ) -> None:
        super().__init__()

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        for param in self.bert_model.parameters():
            param.requires_grad = requires_grad

        self._embedding_dim = self.bert_model.config.hidden_size
        self._index = index
Example #15
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        dropout: float = 0.0,
        num_labels: int = None,
        index: str = "bert",
        label_namespace: str = "labels",
        trainable: bool = True,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        for param in self.bert_model.parameters():
            param.requires_grad = trainable

        in_features = self.bert_model.config.hidden_size

        self._label_namespace = label_namespace

        if num_labels:
            out_features = num_labels
        else:
            out_features = vocab.get_vocab_size(
                namespace=self._label_namespace)

        self._dropout = torch.nn.Dropout(p=dropout)

        self._classification_layer = torch.nn.Linear(in_features, out_features)
        self._accuracy = CategoricalAccuracy()

        # ****** add by jlk ******
        self._f1score = F1Measure(positive_label=1)
        # ****** add by jlk ******

        self._loss = torch.nn.CrossEntropyLoss()
        self._index = index
        initializer(self._classification_layer)
    def _get_bert_word_embedder(self):
        # bert_embedder = PretrainedBertEmbedder(
        #     pretrained_model=self.bert_file_path,
        #     top_layer_only=True,  # conserve memory
        #     requires_grad=(not self.configuration['fixed'])
        # )

        pretrained_model = self.bert_file_path
        bert_model = PretrainedBertModel.load(pretrained_model,
                                              cache_model=False)
        for param in bert_model.parameters():
            param.requires_grad = (not self.configuration['fixed'])
        bert_embedder = BertEmbedder(bert_model=bert_model,
                                     top_layer_only=True)

        bert_word_embedder: TextFieldEmbedder = BasicTextFieldEmbedder(
            {"bert": bert_embedder},
            # we'll be ignoring masks so we'll need to set this to True
            allow_unmatched_keys=True)
        bert_word_embedder.to(self.configuration['device'])
        return bert_word_embedder
    def __init__(self,
                 vocab: Vocabulary,
                 bert_model: Union[str, BertModel],
                 dropout: float = 0.0,
                 trainable: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator()) -> None:
        super().__init__(vocab)

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        self.bert_model.requires_grad = trainable
        in_features = self.bert_model.config.hidden_size

        self._dropout = torch.nn.Dropout(p=dropout)

        self._classification_layer = torch.nn.Linear(in_features, 1)

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self._classification_layer)
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        dropout: float = 0.0,
        num_labels: int = None,
        index: str = "bert",
        label_namespace: str = "labels",
        trainable: bool = True,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super().__init__(vocab)

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        for param in self.bert_model.parameters():
            param.requires_grad = trainable

        in_features = self.bert_model.config.hidden_size

        self._label_namespace = label_namespace

        if num_labels:
            out_features = num_labels
        else:
            out_features = vocab.get_vocab_size(
                namespace=self._label_namespace)

        self._dropout = torch.nn.Dropout(p=dropout)

        self._classification_layer = torch.nn.Linear(in_features, out_features)
        self._loss = torch.nn.BCEWithLogitsLoss()
        self._index = index
        initializer(self._classification_layer)
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        span_extractor: SpanExtractor,
        tree_mapper: TreeMapper,
        domain_utils: DomainUtils,
        is_weak_supervision: bool,
        feedforward: FeedForward = None,
        dropout: float = 0.0,
        num_labels: int = None,
        index: str = "bert",
        label_namespace: str = "labels",
        trainable: bool = True,
        initializer: InitializerApplicator = InitializerApplicator(),
        denotation_based_metric: Metric = None,
        token_based_metric: Metric = None,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        if isinstance(bert_model, str):
            self.bert_model = PretrainedBertModel.load(bert_model)
        else:
            self.bert_model = bert_model

        for param in self.bert_model.parameters():
            param.requires_grad = trainable

        in_features = self.bert_model.config.hidden_size

        self._label_namespace = label_namespace

        self.span_extractor = span_extractor
        self.feedforward_layer = TimeDistributed(feedforward) if feedforward else None
        self.num_classes = self.vocab.get_vocab_size("labels")
        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = span_extractor.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes))

        if num_labels:
            out_features = num_labels
        else:
            out_features = vocab.get_vocab_size(namespace=self._label_namespace)

        self._dropout = torch.nn.Dropout(p=dropout)

        self._tree_mapper = tree_mapper

        labels = self.vocab.get_index_to_token_vocabulary(self._label_namespace)
        grammar = Grammar(labels)
        self._cky = CKY(grammar, tree_mapper, domain_utils)

        use_lexicon = True
        if use_lexicon:
            self.zero_shot_extractor = ZeroShotExtractor(labels, domain_utils)
            self._sim_weight = torch.nn.Parameter(
                torch.ones([1], dtype=torch.float32, requires_grad=True))

        self._classification_layer = torch.nn.Linear(in_features, out_features)
        self._accuracy = CategoricalAccuracy()
        self._accuracy_all_no_span = CategoricalAccuracy()
        self._fmeasure = F1Measure(positive_label=1)
        self._denotation_based_metric = denotation_based_metric
        self._token_based_metric = token_based_metric
        self._loss = torch.nn.CrossEntropyLoss()
        self._index = index
        initializer(self._classification_layer)

        self._epoch_counter = 0

        self._is_weak_supervision = is_weak_supervision
        if self._is_weak_supervision:
            self._weak_supervision_acc = WeakSupervisionAccuracy()
            self._label_preparer = LabelsPreparer(self.vocab.get_index_to_token_vocabulary(self._label_namespace))

        self._sets_f1_metric = SetsF1()
        self._compute_spans_f1 = False