Beispiel #1
0
    def __init__(self, general_embeddings, domain_embeddings, input_size, hidden_size, aspect_tag_classes,
                 polarity_tag_classes, k, dropout=0.5):
        super(DualCrossSharedLSTM, self).__init__()
        self.general_embedding = nn.Embedding(num_embeddings=general_embeddings.size(0),
                                              embedding_dim=general_embeddings.size(1),
                                              padding_idx=0).from_pretrained(general_embeddings)
        self.domain_embedding = nn.Embedding(num_embeddings=domain_embeddings.size(0),
                                             embedding_dim=domain_embeddings.size(1),
                                             padding_idx=0).from_pretrained(domain_embeddings)
        self.general_embedding.weight.requires_grad = False
        self.domain_embedding.weight.requires_grad = False

        self.dropout = dropout

        self.aspect_rnn1 =  DynamicRNN(input_size,hidden_size, num_layers=1, batch_first=True, bidirectional=True)
        self.polarity_rnn1 = DynamicRNN(input_size,hidden_size, num_layers=1, batch_first=True, bidirectional=True)

        self.csu = Cross_Shared_Unit(k, 2 * hidden_size)

        self.aspect_rnn2 =  DynamicRNN(hidden_size*2,hidden_size, num_layers=1, batch_first=True, bidirectional=True)
        self.polarity_rnn2 = DynamicRNN(hidden_size*2,hidden_size, num_layers=1, batch_first=True, bidirectional=True)

        self.aspect_hidden2tag = nn.Linear(2 * hidden_size, aspect_tag_classes)
        self.polarity_hidden2tag = nn.Linear(2 * hidden_size, polarity_tag_classes)

        self.aspect_crf = ConditionalRandomField(aspect_tag_classes)
        self.polarity_crf = ConditionalRandomField(polarity_tag_classes)

        self.dropout_layer = nn.Dropout(dropout)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        use_sep: bool = True,
        with_crf: bool = False,
        self_attn: Seq2SeqEncoder = None,
        bert_dropout: float = 0.1,
        sci_sum: bool = False,
        additional_feature_size: int = 0,
    ) -> None:
        super(SeqClassificationModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.vocab = vocab
        self.use_sep = use_sep
        self.with_crf = with_crf
        self.sci_sum = sci_sum
        self.self_attn = self_attn
        self.additional_feature_size = additional_feature_size

        self.dropout = torch.nn.Dropout(p=bert_dropout)

        # define loss
        if self.sci_sum:
            self.loss = torch.nn.MSELoss(
                reduction='none')  # labels are rouge scores
            self.labels_are_scores = True
            self.num_labels = 1
        else:
            self.loss = torch.nn.CrossEntropyLoss(ignore_index=-1,
                                                  reduction='none')
            self.labels_are_scores = False
            self.num_labels = self.vocab.get_vocab_size(namespace='labels')
            # define accuracy metrics
            self.label_accuracy = CategoricalAccuracy()
            self.all_f1_metrics = FBetaMeasure(beta=1.0, average='micro')
            self.label_f1_metrics = {}

            # define F1 metrics per label
            for label_index in range(self.num_labels):
                label_name = self.vocab.get_token_from_index(
                    namespace='labels', index=label_index)
                self.label_f1_metrics[label_name] = F1Measure(label_index)

        encoded_senetence_dim = text_field_embedder._token_embedders[
            'bert'].output_dim

        ff_in_dim = encoded_senetence_dim if self.use_sep else self_attn.get_output_dim(
        )
        ff_in_dim += self.additional_feature_size

        self.time_distributed_aggregate_feedforward = TimeDistributed(
            Linear(ff_in_dim, self.num_labels))

        if self.with_crf:
            self.crf = ConditionalRandomField(
                self.num_labels,
                constraints=None,
                include_start_end_transitions=True)
Beispiel #3
0
    def __init__(
        self,
        input_dim,
        num_tags,
        low_val=-5,
        high_val=5,
        incl_start_end=True,
        name=None,
    ):
        super(SpanScorerCRF, self).__init__()

        self.input_dim = input_dim
        self.num_tags = num_tags
        self.low_val = low_val
        self.high_val = high_val
        self.incl_start_end = incl_start_end
        self.name = name

        self.span_to_seq, self.seq_to_span = label_map(num_tags)

        self.num_tags_seq = len(self.seq_to_span)
        self.num_tags_span = len(self.span_to_seq)

        # Linear projection layer
        self.projection = nn.Linear(input_dim, self.num_tags_seq)

        # Create event-specific CRF
        self.crf = ConditionalRandomField( \
                        num_tags = self.num_tags_seq,
                        include_start_end_transitions = incl_start_end)
Beispiel #4
0
    def __init__(self, 
                 vocab: Vocabulary,
                 bert_embedder: Optional[PretrainedBertEmbedder] = None,
                 encoder: Optional[Seq2SeqEncoder] = None,
                 dropout: Optional[float] = None,
                 use_crf: bool = True) -> None:
        super().__init__(vocab)

        if bert_embedder:
            self.use_bert = True
            self.bert_embedder = bert_embedder
        else:
            self.use_bert = False
            self.basic_embedder = BasicTextFieldEmbedder({
                "tokens": Embedding(vocab.get_vocab_size(namespace="tokens"), 1024)
            })
            self.rnn = Seq2SeqEncoder.from_params(Params({     
                "type": "lstm",
                "input_size": 1024,
                "hidden_size": 512,
                "bidirectional": True,
                "batch_first": True
            }))

        self.encoder = encoder

        if encoder:
            hidden2tag_in_dim = encoder.get_output_dim()
        else:
            hidden2tag_in_dim = bert_embedder.get_output_dim()
        self.hidden2tag = TimeDistributed(torch.nn.Linear(
            in_features=hidden2tag_in_dim,
            out_features=vocab.get_vocab_size("labels")))
        
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        
        self.use_crf = use_crf
        if use_crf:
            crf_constraints = allowed_transitions(
                constraint_type="BIO",
                labels=vocab.get_index_to_token_vocabulary("labels")
            )
            self.crf = ConditionalRandomField(
                num_tags=vocab.get_vocab_size("labels"),
                constraints=crf_constraints,
                include_start_end_transitions=True
            )
        
        self.f1 = SpanBasedF1Measure(vocab, 
                                     tag_namespace="labels",
                                     ignore_classes=["news/type","negation",
                                                     "demonstrative_reference",
                                                     "timer/noun","timer/attributes"],
                                     label_encoding="BIO")
Beispiel #5
0
 def __init__(self, model_path, vocab: Vocabulary):
     super().__init__(vocab)
     self.pretrained_tokenizer = BertForPreTraining.from_pretrained(
         model_path)
     config = BertConfig.from_pretrained(model_path)
     bert_model = BertForPreTraining(config)
     self.bert = bert_model.bert
     tags = vocab.get_index_to_token_vocabulary("tags")
     num_tags = len(tags)
     constraints = allowed_transitions(constraint_type="BMES", labels=tags)
     self.projection = torch.nn.Linear(768, num_tags)
     self.crf = ConditionalRandomField(num_tags=num_tags,
                                       constraints=constraints,
                                       include_start_end_transitions=False)
Beispiel #6
0
 def __init__(self, vocab_size, labels_num, tag2id, embedding_size=32, single_backbone_kwargs={},
              context_backbone_kwargs=None):
     super().__init__()
     if context_backbone_kwargs is None:
         context_backbone_kwargs = {}
     self.embedding_size = embedding_size
     self.char_embeddings = nn.Embedding(vocab_size, embedding_size, padding_idx=0)
     self.single_token_backbone = StackedConv1d(embedding_size, **single_backbone_kwargs)
     self.context_backbone = StackedConv1d(embedding_size, **context_backbone_kwargs)
     self.global_pooling = nn.AdaptiveMaxPool1d(1)
     self.out = nn.Conv1d(embedding_size, labels_num, 1)
     self.labels_num = labels_num
     STATE_TRANSITIONS_CONSTRAINTS = get_state_transitions_constraints(tag2id)
     self.crf = ConditionalRandomField(len(tag2id), constraints=STATE_TRANSITIONS_CONSTRAINTS)
    def __init__(self, config):
        super(RobertaForSequentialSequenceClassification,
              self).__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config)
        self.classifier = RobertaClassificationHead(config)
        self.sigm = nn.Sigmoid()

        ### SSC attributes
        self.use_sep = True
        self.with_crf = False
        self.sci_sum = False
        self.dropout = torch.nn.Dropout(p=0.1)

        # define loss
        if self.sci_sum:
            self.loss = torch.nn.MSELoss(
                reduction='none')  # labels are rouge scores
            self.labels_are_scores = True
            self.num_labels = 1
        else:
            self.loss = torch.nn.CrossEntropyLoss(
                ignore_index=-1,
                reduction='none')  #weight=torch.tensor([.20, .80]),
            self.labels_are_scores = False
            self.num_labels = 2
            # define accuracy metrics
            self.label_accuracy = CategoricalAccuracy()
            self.label_f1_metrics = {}

            # define F1 metrics per label
            self.label_vocab = {0: 0, 1: 1}
            for label_index in range(self.num_labels):
                label_name = self.label_vocab[label_index]
                self.label_f1_metrics[label_name] = F1Measure(label_index)

        encoded_sentence_dim = 768

        ff_in_dim = encoded_sentence_dim  #if self.use_sep else self_attn.get_output_dim()
        #ff_in_dim += self.additional_feature_size

        self.time_distributed_aggregate_feedforward = TimeDistributed(
            Linear(ff_in_dim, self.num_labels))

        if self.with_crf:
            self.crf = ConditionalRandomField(
                self.num_labels,
                constraints=None,
                include_start_end_transitions=True)
Beispiel #8
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: str,
        dropout: float = 0.0,
        requires_grad: str = "none",
        use_crf: bool = False,
        pos_weight: float = 1.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(BertMiddleModel, self).__init__(vocab, regularizer)
        self._vocabulary = vocab
        self._bert_model = BertModel.from_pretrained(bert_model)
        self._dropout = torch.nn.Dropout(p=dropout)
        self._classification_layer = torch.nn.Linear(
            self._bert_model.config.hidden_size, 2)

        self._use_crf = use_crf

        self._pos_weight = torch.Tensor([1 / (1 - pos_weight), 1 / pos_weight])
        self._pos_weight = torch.nn.Parameter(self._pos_weight /
                                              self._pos_weight.min())
        self._pos_weight.requires_grad = False

        if use_crf:
            self._crf = ConditionalRandomField(num_tags=2)

        self.embedding_layers = ["BertEmbedding"]

        if requires_grad in ["none", "all"]:
            for param in self._bert_model.parameters():
                param.requires_grad = requires_grad == "all"
        else:
            model_name_regexes = requires_grad.split(",")
            for name, param in self._bert_model.named_parameters():
                found = any([regex in name for regex in model_name_regexes])
                param.requires_grad = found

        for n, v in self._bert_model.named_parameters():
            if n.startswith("classifier"):
                v.requires_grad = True

        self._token_prf = F1Measure(1)

        initializer(self)
Beispiel #9
0
    def __init__(self, args):
        super(BiLSTM_CRF, self).__init__()

        self.name = args.name
        self.hidden_size = args.hidden_size
        self.num_tags = args.num_tags
        self.embedding = nn.Embedding(args.embed_size, args.embed_dim)

        self.crf = ConditionalRandomField(self.num_tags, args.condtraints)
        self.lstm = nn.LSTM(input_size=args.embed_dim,
                            hidden_size=args.hidden_size // 2,
                            num_layers=1,
                            bidirectional=True)
        self.linear = nn.Linear(self.hidden_size, self.num_tags)

        self.device = args.device
        self.dropout = nn.Dropout(args.dropout)
Beispiel #10
0
    def __init__(self, num_input_features: '(int) number of input features', hidden_size: '(int) number of\
    hidden features the outputs will also have hidden_size features'                                                                    , num_layers: '(int) number of \
    recursion'              , dropout_gru, bidirectional: '(bool) if True, use bidirectional GRU',\
    tags: "(dict[int: str])example: {0:'I', 1:'B', 2:'O', 3:'<PAD>'}", dropout_FCN: '(double)'):
        super().__init__()
        self.gru = nn.GRU(input_size=num_input_features, hidden_size=hidden_size, \
                                 num_layers=num_layers,batch_first = True, dropout=dropout_gru, \
                                 bidirectional=bidirectional)

        all_transition = allowed_transitions('BIO', tags)
        #self.crf = CRF(num_tags=len(tags), batch_first= True)
        self.linear = nn.Linear(hidden_size * 2, hidden_size)
        self.BN = nn.BatchNorm1d(num_layers)
        self.linear2 = nn.Linear(hidden_size, len(tags))
        self.BN2 = nn.BatchNorm1d(num_layers)
        self.crf = ConditionalRandomField(len(tags), all_transition)
        self.dropout = nn.Dropout(dropout_FCN)
Beispiel #11
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        seq2seq_encoder: Seq2SeqEncoder,
        feedforward_encoder: Seq2SeqEncoder,
        dropout: float = 0.0,
        use_crf: bool = False,
        pos_weight: float = 1.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(BertMiddleModel, self).__init__(vocab, regularizer)
        self._vocabulary = vocab
        self._text_field_embedder = text_field_embedder
        self._seq2seq_encoder = seq2seq_encoder
        self._dropout = torch.nn.Dropout(p=dropout)

        self._feedforward_encoder = feedforward_encoder
        self._classifier_input_dim = feedforward_encoder.get_output_dim()

        self._classification_layer = torch.nn.Linear(
            self._classifier_input_dim, 2)

        self._use_crf = use_crf

        self._pos_weight = torch.Tensor([1 / (1 - pos_weight), 1 / pos_weight])
        self._pos_weight = torch.nn.Parameter(self._pos_weight /
                                              self._pos_weight.min())
        self._pos_weight.requires_grad = False

        if use_crf:
            self._crf = ConditionalRandomField(num_tags=2)

        self._token_prf = F1Measure(1)

        initializer(self)
Beispiel #12
0
    def __init__(self, vocab: Vocabulary, embedding_dim=300, embedder_type=None, bert_trainable=True, **kwargs):
        super().__init__(vocab)
        for k in kwargs:
            self.__setattr__(k, kwargs[k])

        text_field_embedder = get_embeddings(embedder_type, self.vocab, embedding_dim, bert_trainable)
        embedding_dim = text_field_embedder.get_output_dim()

        encoder = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(embedding_dim, self.num_rnn_units, batch_first=True, bidirectional=True, dropout=self.dropout_rate))

        self.label_namespace = label_namespace = 'ner_bio_labels'
        self.num_tags = self.vocab.get_vocab_size(label_namespace)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.dropout = torch.nn.Dropout(self.dropout_rate)

        output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim,
                                                           self.num_tags))

        self.label_encoding = label_encoding = 'BIO'
        labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
        constraints = allowed_transitions(self.label_encoding, labels)

        self.include_start_end_transitions = True
        self.crf = ConditionalRandomField(
            self.num_tags, constraints,
            include_start_end_transitions=True
        )

        self._f1_metric = SpanBasedF1Measure(self.vocab,
                                             tag_namespace=label_namespace,
                                             label_encoding=label_encoding)
        self._verbose_metrics = False
Beispiel #13
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 relation_scorer: RelationScorer,
                 ner_tag_namespace: str = 'tags',
                 evaluated_ner_labels: List[str] = None,
                 re_loss_weight: float = 1.0,
                 ner_tag_embedder: TokenEmbedder = None,
                 use_aux_ner_labels: bool = False,
                 aux_coarse_namespace: str = 'coarse_tags',
                 aux_modifier_namespace: str = 'modifier_tags',
                 aux_loss_weight: float = 1.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab=vocab, regularizer=regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        # NER subtask 2
        self._ner_label_encoding = 'BIO'
        self._ner_tag_namespace = ner_tag_namespace
        ner_input_dim = self.encoder.get_output_dim()
        num_ner_tags = self.vocab.get_vocab_size(ner_tag_namespace)
        self.tag_projection_layer = TimeDistributed(
            Linear(ner_input_dim, num_ner_tags))

        self._use_aux_ner_labels = use_aux_ner_labels
        if self._use_aux_ner_labels:
            self._coarse_tag_namespace = aux_coarse_namespace
            self._num_coarse_tags = self.vocab.get_vocab_size(
                self._coarse_tag_namespace)
            self._coarse_projection_layer = TimeDistributed(
                Linear(ner_input_dim, self._num_coarse_tags))
            self._modifier_tag_namespace = aux_modifier_namespace
            self._num_modifier_tags = self.vocab.get_vocab_size(
                self._modifier_tag_namespace)
            self._modifier_projection_layer = TimeDistributed(
                Linear(ner_input_dim, self._num_modifier_tags))
            self._coarse_acc = CategoricalAccuracy()
            self._modifier_acc = CategoricalAccuracy()
            self._aux_loss_weight = aux_loss_weight

        self.ner_accuracy = CategoricalAccuracy()
        if evaluated_ner_labels is None:
            ignored_classes = None
        else:
            assert self._ner_label_encoding == 'BIO', 'expected BIO encoding'
            all_ner_tags = self.vocab.get_token_to_index_vocabulary(
                ner_tag_namespace).keys()
            ner_tag_classes = set(
                [bio_tag[2:] for bio_tag in all_ner_tags if len(bio_tag) > 2])
            ignored_classes = list(
                set(ner_tag_classes).difference(evaluated_ner_labels))
        self.ner_f1 = SpanBasedF1Measure(
            vocabulary=vocab,
            tag_namespace=ner_tag_namespace,
            label_encoding=self._ner_label_encoding,
            ignore_classes=ignored_classes)

        # Use constrained crf decoding with the BIO labeling scheme
        ner_labels = self.vocab.get_index_to_token_vocabulary(
            ner_tag_namespace)
        constraints = allowed_transitions(self._ner_label_encoding, ner_labels)

        self.crf = ConditionalRandomField(num_ner_tags,
                                          constraints,
                                          include_start_end_transitions=True)

        # RE subtask 3
        self.ner_tag_embedder = ner_tag_embedder
        self.relation_scorer = relation_scorer
        self._re_loss_weight = re_loss_weight

        initializer(self)
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 feedforward: Optional[FeedForward] = None,
                 label_encoding: Optional[str] = None,
                 include_start_end_transitions: bool = True,
                 attention=None,
                 constrain_crf_decoding: bool = None,
                 calculate_span_f1: bool = None,
                 dropout: Optional[float] = None,
                 verbose_metrics: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim,
                                                           self.num_tags))

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError("constrain_crf_decoding is True, but "
                                         "no label_encoding was specified.")
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
                self.num_tags, constraints,
                include_start_end_transitions=include_start_end_transitions
        )

        self.metrics = {
                "accuracy": CategoricalAccuracy(),
                "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)

        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(),
                                   "encoder output dim", "feedforward input dim")


        initializer(self)
Beispiel #15
0
def default_crf() -> ConditionalRandomField:
    include_start_end_transitions = True
    constraints = allowed_transitions('BIO', {0: 'O', 1: 'B', 2: 'I'})
    return ConditionalRandomField(3, constraints,
                                  include_start_end_transitions)
Beispiel #16
0
    def __init__(self,
                 config,
                 lm_model,
                 lm_config,
                 num_lbs=1,
                 mlt_trnsfmr=False,
                 task_params={},
                 binlb={},
                 binlbr={},
                 **kwargs):
        from . import reduction as R
        super(BaseClfHead, self).__init__()
        self.lm_model = lm_model
        self.lm_config = lm_config
        self.input_keys = config.input_keys
        self.maxlen = config.maxlen
        self.lm_loss = kwargs.setdefault(
            'lm_loss', config.lm_loss if hasattr(config, 'lm_loss') else True)
        self.lm_head = self.__lm_head__()
        self.num_lbs = num_lbs
        pdrop = kwargs.setdefault(
            'pdrop', config.pdrop if hasattr(config, 'pdrop') else 0.2)
        self.sample_weights = kwargs.setdefault(
            'sample_weights',
            config.lm_loss if hasattr(config, 'sample_weights') else False)
        self.mlt_trnsfmr = mlt_trnsfmr  # accept multiple streams of inputs, each of which will be input into the transformer
        self.task_type = kwargs.setdefault('task_type', config.task_type)
        self.task_params = task_params

        self.do_norm = kwargs.setdefault(
            'do_norm', config.do_norm if hasattr(config, 'do_norm') else False)
        self.do_extlin = kwargs.setdefault(
            'do_extlin',
            config.do_extlin if hasattr(config, 'do_extlin') else True)
        self.do_lastdrop = kwargs.setdefault(
            'do_lastdrop',
            config.do_lastdrop if hasattr(config, 'do_lastdrop') else True)
        self.dropout = nn.Dropout2d(
            pdrop) if self.task_type == 'nmt' else nn.Dropout(pdrop)
        self.last_dropout = nn.Dropout(pdrop) if self.do_lastdrop else None
        do_crf = kwargs.setdefault(
            'do_crf', config.do_crf if hasattr(config, 'do_crf') else False)
        self.crf = ConditionalRandomField(num_lbs) if do_crf else None
        constraints = kwargs.setdefault(
            'cnstrnts',
            config.cnstrnts.split(',')
            if hasattr(config, 'cnstrnts') and config.cnstrnts else [])
        self.constraints = [
            cnstrnt_cls(**cnstrnt_params)
            for cnstrnt_cls, cnstrnt_params in constraints
        ]
        do_thrshld = kwargs.setdefault(
            'do_thrshld',
            config.do_thrshld if hasattr(config, 'do_thrshld') else False)
        self.thrshlder = R.ThresholdEstimator(
            last_hdim=kwargs['last_hdim']
        ) if do_thrshld and 'last_hdim' in kwargs else None
        self.thrshld = kwargs.setdefault('thrshld', 0.5)

        # Customerized function calling
        self.lm_logit = self._mlt_lm_logit if self.mlt_trnsfmr else self._lm_logit
        self.clf_h = self._clf_h
        self.dim_mulriple = 2 if self.mlt_trnsfmr and self.task_type in [
            'entlmnt', 'sentsim'
        ] and self.task_params.setdefault(
            'sentsim_func', None) is not None and self.task_params[
                'sentsim_func'] == 'concat' else 1  # two or one sentence
        if self.dim_mulriple > 1 and self.task_params.setdefault(
                'concat_strategy', 'normal') == 'diff':
            self.dim_mulriple = 4

        self.kwprop = {}
        self.binlb = binlb
        self.global_binlb = copy.deepcopy(binlb)
        self.binlbr = binlbr
        self.global_binlbr = copy.deepcopy(binlbr)
        for k, v in kwargs.items():
            setattr(self, k, v)
        self.mode = 'clf'
        self.debug = config.verbose if hasattr(config, 'verbose') else False
Beispiel #17
0
    def __init__(self,
                 n_vocab,
                 unigram_embed_size,
                 rnn_unit_type,
                 rnn_bidirection,
                 rnn_batch_first,
                 rnn_n_layers,
                 rnn_hidden_size,
                 mlp_n_layers,
                 mlp_hidden_size,
                 n_labels,
                 use_crf=True,
                 crf_top_k=1,
                 embed_dropout=0.0,
                 rnn_dropout=0.0,
                 mlp_dropout=0.0,
                 pretrained_unigram_embed_size=0,
                 pretrained_embed_usage=ModelUsage.NONE):
        super(RNNTagger, self).__init__()
        self.n_vocab = n_vocab
        self.unigram_embed_size = unigram_embed_size

        self.rnn_unit_type = rnn_unit_type
        self.rnn_bidirection = rnn_bidirection
        self.rnn_batch_first = rnn_batch_first
        self.rnn_n_layers = rnn_n_layers
        self.rnn_hidden_size = rnn_hidden_size

        self.mlp_n_layers = mlp_n_layers
        self.mlp_hidden_size = mlp_hidden_size
        self.n_labels = n_labels
        self.use_crf = use_crf
        self.crf_top_k = crf_top_k

        self.embed_dropout = embed_dropout
        self.rnn_dropout = rnn_dropout
        self.mlp_dropout = mlp_dropout

        self.pretrained_unigram_embed_size = pretrained_unigram_embed_size
        self.pretrained_embed_usage = pretrained_embed_usage

        self.unigram_embed = None
        self.pretrained_unigram_embed = None
        self.rnn = None
        self.mlp = None
        self.crf = None
        self.cross_entropy_loss = None

        print('### Parameters', file=sys.stderr)

        # embeddings layer(s)

        print('# Embedding dropout ratio={}'.format(self.embed_dropout),
              file=sys.stderr)
        self.unigram_embed, self.pretrained_unigram_embed = models.util.construct_embeddings(
            n_vocab, unigram_embed_size, pretrained_unigram_embed_size,
            pretrained_embed_usage)
        if self.pretrained_embed_usage != ModelUsage.NONE:
            print('# Pretrained embedding usage: {}'.format(
                self.pretrained_embed_usage),
                  file=sys.stderr)
        print('# Unigram embedding matrix: W={}'.format(
            self.unigram_embed.weight.shape),
              file=sys.stderr)
        embed_size = self.unigram_embed.weight.shape[1]
        if self.pretrained_unigram_embed is not None:
            if self.pretrained_embed_usage == ModelUsage.CONCAT:
                embed_size += self.pretrained_unigram_embed_size
                print('# Pretrained unigram embedding matrix: W={}'.format(
                    self.pretrained_unigram_embed.weight.shape),
                      file=sys.stderr)

        # recurrent layers

        self.rnn_unit_type = rnn_unit_type
        self.rnn = models.util.construct_RNN(unit_type=rnn_unit_type,
                                             embed_size=embed_size,
                                             hidden_size=rnn_hidden_size,
                                             n_layers=rnn_n_layers,
                                             batch_first=rnn_batch_first,
                                             dropout=rnn_dropout,
                                             bidirectional=rnn_bidirection)
        rnn_output_size = rnn_hidden_size * (2 if rnn_bidirection else 1)

        # MLP

        print('# MLP', file=sys.stderr)
        mlp_in = rnn_output_size
        self.mlp = MLP(input_size=mlp_in,
                       hidden_size=mlp_hidden_size,
                       n_layers=mlp_n_layers,
                       output_size=n_labels,
                       dropout=mlp_dropout,
                       activation=nn.Identity)

        # Inference layer (CRF/softmax)

        if self.use_crf:
            self.crf = ConditionalRandomField(n_labels)
            print('# CRF cost: {}'.format(self.crf.transitions.shape),
                  file=sys.stderr)
        else:
            self.softmax_cross_entropy = nn.CrossEntropyLoss()
Beispiel #18
0
    def __init__(self,
                 vocab: Vocabulary,
                 bert_embedder: Optional[PretrainedBertEmbedder] = None,
                 encoder: Optional[Seq2SeqEncoder] = None,
                 dropout: Optional[float] = None,
                 use_crf: bool = True,
                 add_random_noise: bool = False,
                 add_attack_noise: bool = False,
                 do_noise_normalization: bool = True,
                 noise_norm: Optional[float] = None,
                 noise_loss_prob: Optional[float] = None,
                 add_noise_for: str = "ov",
                 rnn_after_embeddings: bool = False,
                 open_vocabulary_slots: Optional[List[str]] = None,
                 metrics_for_each_slot_type: bool = False) -> None:
        """
        Params
        ------
        vocab: the allennlp Vocabulary object, will be automatically passed
        bert_embedder: the pretrained BERT embedder. If it is not None, the pretrained BERT
                embedding (parameter fixed) will be used as the embedding layer. Otherwise, a look-up
                embedding matrix will be initialized with the embedding size 1024. The default is None.
        encoder: the contextual encoder used after the embedding layer. If set to None, no contextual
                encoder will be used.
        dropout: the dropout rate, won't be set in all our experiments.
        use_crf: if set to True, CRF will be used at the end of the model (as output layer). Otherwise,
                a softmax layer (with cross-entropy loss) will be used.
        add_random_noise: whether to add random noise to slots. Can not be set simultaneously 
                with add_attack_noise. This setting is used as baseline in our experiments.
        add_attack_noise: whether to add adversarial attack noise to slots. Can not be set simultaneously
                with add_random_noise.
        do_noise_normalization: if set to True, the normalization will be applied to gradients w.r.t. 
                token embeddings. Otherwise, the gradients won't be normalized.
        noise_norm: the normalization norm (L2) applied to gradients.
        noise_loss_prob: the alpha hyperparameter to balance the loss from normal forward and adversarial
                forward. See the paper for more details. Should be set from 0 to 1.
        add_noise_for: if set to ov, the noise will only be applied to open-vocabulary slots. Otherwise,
                the noise will be applied to all slots (both open-vocabulary and normal slots).
        rnn_after_embeddings: if set to True, an additional BiLSTM layer will be applied after the embedding
                layer. Default is False.
        open_vocabulary_slots: the list of open-vocabulary slots. If not set, will be set to open-vocabulary
                slots of Snips dataset by default.
        metrics_for_each_slot_type: whether to log metrics for each slot type. Default is False.
        """
        super().__init__(vocab)

        if bert_embedder:
            self.use_bert = True
            self.bert_embedder = bert_embedder
        else:
            self.use_bert = False
            self.basic_embedder = BasicTextFieldEmbedder({
                "tokens":
                Embedding(vocab.get_vocab_size(namespace="tokens"), 1024)
            })
            self.rnn_after_embeddings = rnn_after_embeddings
            if rnn_after_embeddings:
                self.rnn = Seq2SeqEncoder.from_params(
                    Params({
                        "type": "lstm",
                        "input_size": 1024,
                        "hidden_size": 512,
                        "bidirectional": True,
                        "batch_first": True
                    }))

        self.encoder = encoder

        if encoder:
            hidden2tag_in_dim = encoder.get_output_dim()
        else:
            hidden2tag_in_dim = bert_embedder.get_output_dim()
        self.hidden2tag = TimeDistributed(
            torch.nn.Linear(in_features=hidden2tag_in_dim,
                            out_features=vocab.get_vocab_size("labels")))

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None

        self.use_crf = use_crf
        if use_crf:
            crf_constraints = allowed_transitions(
                constraint_type="BIO",
                labels=vocab.get_index_to_token_vocabulary("labels"))
            self.crf = ConditionalRandomField(
                num_tags=vocab.get_vocab_size("labels"),
                constraints=crf_constraints,
                include_start_end_transitions=True)

        # default open_vocabulary slots: for SNIPS dataset
        open_vocabulary_slots = open_vocabulary_slots or [
            "playlist", "entity_name", "poi", "restaurant_name",
            "geographic_poi", "album", "track", "object_name", "movie_name"
        ]
        self.f1 = OVSpecSpanBasedF1Measure(
            vocab,
            tag_namespace="labels",
            ignore_classes=[],
            label_encoding="BIO",
            open_vocabulary_slots=open_vocabulary_slots)

        self.add_random_noise = add_random_noise
        self.add_attack_noise = add_attack_noise
        assert not (add_random_noise and
                    add_attack_noise), "both random and attack noise applied"
        if add_random_noise or add_attack_noise:
            self.do_noise_normalization = do_noise_normalization
            assert noise_norm is not None
            assert noise_loss_prob is not None and 0. <= noise_loss_prob <= 1.
            self.noise_norm = noise_norm
            self.noise_loss_prob = noise_loss_prob
            assert add_noise_for in ["ov", "all"]
            self.ov_noise_only = (add_noise_for == "ov")

        self.metrics_for_each_slot_type = metrics_for_each_slot_type
Beispiel #19
0
 def __init__(self,
              config,
              lm_model,
              lm_config,
              embed_type='w2v',
              w2v_path=None,
              iactvtn='relu',
              oactvtn='sigmoid',
              fchdim=0,
              extfc=False,
              sample_weights=False,
              num_lbs=1,
              mlt_trnsfmr=False,
              lm_loss=False,
              do_drop=True,
              pdrop=0.2,
              do_norm=True,
              norm_type='batch',
              do_lastdrop=True,
              do_crf=False,
              do_thrshld=False,
              constraints=[],
              initln=False,
              initln_mean=0.,
              initln_std=0.02,
              task_params={},
              **kwargs):
     from util import config as C
     super(EmbeddingClfHead, self).__init__(
         config,
         lm_model,
         lm_config,
         sample_weights=sample_weights,
         num_lbs=num_lbs,
         mlt_trnsfmr=config.task_type in ['entlmnt', 'sentsim']
         and task_params.setdefault('sentsim_func', None) is not None,
         task_params=task_params,
         **kwargs)
     self.dim_mulriple = 2 if self.task_type in ['entlmnt', 'sentsim'] and (
         self.task_params.setdefault('sentsim_func', None) is None
         or self.task_params['sentsim_func'] == 'concat') else 1
     self.embed_type = embed_type
     if embed_type.startswith('w2v'):
         from gensim.models import KeyedVectors
         from gensim.models.keyedvectors import Word2VecKeyedVectors
         self.w2v_model = w2v_path if type(
             w2v_path) is Word2VecKeyedVectors else (
                 KeyedVectors.load(w2v_path, mmap='r')
                 if w2v_path and os.path.isfile(w2v_path) else None)
         assert (self.w2v_model)
         self.n_embd = self.w2v_model.syn0.shape[1] + (
             self.n_embd if hasattr(self, 'n_embd') else 0)
         config.register_callback(
             'mdl_trsfm', EmbeddingClfHead.callback_update_w2v_model(self))
     elif embed_type.startswith('elmo'):
         self.vocab_size = 793471
         self.n_embd = lm_config['elmoedim'] * 2 + (
             self.n_embd if hasattr(self, 'n_embd') else 0
         )  # two ELMo layer * ELMo embedding dimensions
         config.register_callback(
             'mdl_trsfm',
             EmbeddingClfHead.callback_update_elmo_config(self))
     elif embed_type.startswith('elmo_w2v'):
         from gensim.models import KeyedVectors
         from gensim.models.keyedvectors import Word2VecKeyedVectors
         self.w2v_model = w2v_path if type(
             w2v_path) is Word2VecKeyedVectors else (
                 KeyedVectors.load(w2v_path, mmap='r')
                 if w2v_path and os.path.isfile(w2v_path) else None)
         assert (self.w2v_model)
         self.vocab_size = 793471
         self.n_embd = self.w2v_model.syn0.shape[
             1] + lm_config['elmoedim'] * 2 + (self.n_embd if hasattr(
                 self, 'n_embd') else 0)
         config.register_callback(
             'mdl_trsfm', EmbeddingClfHead.callback_update_w2v_model(self))
         config.register_callback(
             'mdl_trsfm',
             EmbeddingClfHead.callback_update_elmo_config(self))
     self.norm = C.NORM_TYPE_MAP[norm_type](
         self.maxlen
     ) if self.task_type == 'nmt' else C.NORM_TYPE_MAP[norm_type](
         self.n_embd)
     self._int_actvtn = C.ACTVTN_MAP[iactvtn]
     self._out_actvtn = C.ACTVTN_MAP[oactvtn]
     self.fchdim = fchdim
     self.extfc = extfc
     self.hdim = self.dim_mulriple * self.n_embd if self.mlt_trnsfmr and self.task_type in [
         'entlmnt', 'sentsim'
     ] else self.n_embd
     self.linear = self.__init_linear__()
     if (initln):
         self.linear.apply(H._weights_init(mean=initln_mean,
                                           std=initln_std))
     if self.do_extlin:
         self.extlinear = nn.Linear(self.n_embd, self.n_embd)
         if (initln):
             self.extlinear.apply(
                 H._weights_init(mean=initln_mean, std=initln_std))
     self.crf = ConditionalRandomField(num_lbs) if do_crf else None