def __init__(self, vocab, encoder: FeedForward, mean_projection: FeedForward, log_variance_projection: FeedForward, decoder: FeedForward, kld_clamp: Optional[float] = None, z_dropout: float = 0.2) -> None: super(LogisticNormal, self).__init__(vocab) self.encoder = encoder self.mean_projection = mean_projection self.log_variance_projection = log_variance_projection self._kld_clamp = kld_clamp self._decoder = torch.nn.Linear(decoder.get_input_dim(), decoder.get_output_dim(), bias=False) self._decoder_rationale = torch.nn.Linear(decoder.get_input_dim(), decoder.get_output_dim(), bias=False) self._z_dropout = torch.nn.Dropout(z_dropout) mem_params = AttrDict({ "sparse": False, "k_dim": 128, "heads": 4, "knn": 32, "n_keys": 512, # the memory will have (n_keys ** 2) values "query_batchnorm": True, "input_dropout": 0, "query_dropout": 0, "value_dropout": 0, }) # self.memory = HashingMemory(encoder.get_output_dim(), decoder.get_input_dim(), mem_params) self.latent_dim = mean_projection.get_output_dim()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") if text_field_embedder.get_output_dim() != attend_feedforward.get_input_dim(): raise ConfigurationError("Output dimension of the text_field_embedder (dim: {}), " "must match the input_dim of the FeedForward layer " "attend_feedforward, (dim: {}). ".format(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim())) if aggregate_feedforward.get_output_dim() != self._num_labels: raise ConfigurationError("Final output dimension (%d) must equal num labels (%d)" % (aggregate_feedforward.get_output_dim(), self._num_labels)) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, context_layer_back: Seq2SeqEncoder = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CoreferenceResolver, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._context_layer_back = context_layer_back self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = SpanPruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) # TODO check the output dim when two context layers are passed through self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._speaker_embedding = Embedding(2, feature_size) self.genres = { g: i for i, g in enumerate(['bc', 'bn', 'mz', 'nw', 'pt', 'tc', 'wb']) } self._genre_embedding = Embedding(len(self.genres), feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x self._feature_dropout = torch.nn.Dropout(0.2) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), aggregate_premise: Optional[str] = "max", aggregate_hypothesis: Optional[str] = "max", embeddings_dropout_value: Optional[float] = 0.0, share_encoders: Optional[bool] = False) -> None: super(StackedNNAggregateCustom, self).__init__(vocab) self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout(p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder self._premise_aggregate = aggregate_premise self._hypothesis_aggregate = aggregate_hypothesis self._num_labels = vocab.get_vocab_size(namespace="labels") premise_output_dim = self._text_field_embedder.get_output_dim() if self._premise_encoder is not None: premise_output_dim = self._premise_encoder.get_output_dim() hypothesis_output_dim = self._text_field_embedder.get_output_dim() if self._hypothesis_encoder is not None: hypothesis_output_dim = self._hypothesis_encoder.get_output_dim() if premise_output_dim != hypothesis_output_dim: raise ConfigurationError("Output dimension of the premise_encoder (dim: {}), " "plus hypothesis_encoder (dim: {})" "must match! " .format(premise_output_dim, hypothesis_output_dim)) if premise_output_dim * 4 != \ aggregate_feedforward.get_input_dim(): raise ConfigurationError("The output of aggregate_feedforward input dim ({2}) " "should be {3} = 4 x {0} ({1} = premise_output_dim == hypothesis_output_dim)!" .format(premise_output_dim, hypothesis_output_dim, aggregate_feedforward.get_input_dim(), 4 * premise_output_dim)) if aggregate_feedforward.get_output_dim() != self._num_labels: raise ConfigurationError("Final output dimension (%d) must equal num labels (%d)" % (aggregate_feedforward.get_output_dim(), self._num_labels)) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, mention_feedforward: FeedForward, relation_feedforward: FeedForward, feature_size: int, spans_per_word: float, span_emb_dim: int, rel_prop: int = 0, rel_prop_dropout_A: float = 0.0, rel_prop_dropout_f: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), positive_label_weight: float = 1.0, regularizer: Optional[RegularizerApplicator] = None) -> None: super(RelationExtractor1, self).__init__(vocab, regularizer) # Need to hack this for cases where there's no relation data. It breaks Ulme's code. self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1) # Span candidate scorer. # TODO(dwadden) make sure I've got the input dim right on this one. feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = Pruner(feedforward_scorer) # Relation scorer. self._relation_feedforward = relation_feedforward self._relation_scorer = torch.nn.Linear( relation_feedforward.get_output_dim(), self._n_labels) self._spans_per_word = spans_per_word # TODO(dwadden) Add code to compute relation F1. # self._candidate_recall = CandidateRecall() self._relation_metrics = RelationMetrics1() class_weights = torch.cat([ torch.tensor([1.0]), positive_label_weight * torch.ones(self._n_labels) ]) self._loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1, weight=class_weights) self.rel_prop = rel_prop # Relation Propagation self._A_network = FeedForward(input_dim=self._n_labels, num_layers=1, hidden_dims=span_emb_dim, activations=lambda x: x, dropout=rel_prop_dropout_A) self._f_network = FeedForward(input_dim=2 * span_emb_dim, num_layers=1, hidden_dims=span_emb_dim, activations=torch.nn.Sigmoid(), dropout=rel_prop_dropout_f) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1)), ) self._mention_pruner = Pruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1) ) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False, ) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim() ) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, mention_feedforward: FeedForward, relation_feedforward: FeedForward, spans_per_word: float, span_emb_dim: int, use_biaffine_rel: bool, rel_prop: int = 0, rel_prop_dropout_A: float = 0.0, rel_prop_dropout_f: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), positive_label_weight: float = 1.0, regularizer: Optional[RegularizerApplicator] = None) -> None: super(RelationExtractor, self).__init__(vocab, regularizer) self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = Pruner(feedforward_scorer) # Relation scorer. self._use_biaffine_rel = use_biaffine_rel if self._use_biaffine_rel: self._biaffine = torch.nn.ModuleList() for _ in range(self._n_labels): self._biaffine.append(torch.nn.Linear(span_emb_dim, span_emb_dim)) else: self._relation_feedforward = relation_feedforward self._relation_scorer = torch.nn.Linear(relation_feedforward.get_output_dim(), self._n_labels) self._spans_per_word = spans_per_word self._relation_metrics = RelationMetrics1() class_weights = torch.cat([torch.tensor([1.0]), positive_label_weight * torch.ones(self._n_labels)]) self._loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1, weight=class_weights) self.rel_prop = rel_prop # Relation Propagation self._A_network = FeedForward(input_dim=self._n_labels, num_layers=1, hidden_dims=span_emb_dim, activations=lambda x: x, dropout=rel_prop_dropout_A) self._f_network = FeedForward(input_dim=2*span_emb_dim, num_layers=1, hidden_dims=span_emb_dim, activations=torch.nn.Sigmoid(), dropout=rel_prop_dropout_f) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ALCoreferenceResolver, self).__init__( vocab, text_field_embedder, context_layer, mention_feedforward, antecedent_feedforward, feature_size, max_span_width, spans_per_word, max_antecedents, lexical_dropout, initializer, regularizer, ) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = Pruner(feedforward_scorer) initializer(self)
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, num_start_types: int, num_entity_types: int, mixture_feedforward: FeedForward = None, dropout: float = 0.0, unlinked_terminal_indices: List[int] = None) -> None: super(WikiTablesDecoderStep, self).__init__() self._mixture_feedforward = mixture_feedforward self._entity_type_embedding = Embedding(num_entity_types, action_embedding_dim) self._input_attention = input_attention self._num_start_types = num_start_types self._start_type_predictor = Linear(encoder_output_dim, num_start_types) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. output_dim = encoder_output_dim input_dim = output_dim # Our decoder input will be the concatenation of the decoder hidden state and the previous # action embedding, and we'll project that down to the decoder's `input_dim`, which we # arbitrarily set to be the same as `output_dim`. self._input_projection_layer = Linear( output_dim + action_embedding_dim, input_dim) # Before making a prediction, we'll compute an attention over the input given our updated # hidden state. Then we concatenate those with the decoder state and project to # `action_embedding_dim` to make a prediction. self._output_projection_layer = Linear(output_dim + encoder_output_dim, action_embedding_dim) if unlinked_terminal_indices is not None: # This means we are using coverage to train the parser. # These factors are used to add the embeddings of yet to be produced actions to the # predicted embedding, and to boost the action logits of yet to be produced linked # actions, respectively. self._unlinked_checklist_multiplier = Parameter( torch.FloatTensor([1.0])) self._linked_checklist_multiplier = Parameter( torch.FloatTensor([1.0])) self._unlinked_terminal_indices = unlinked_terminal_indices # TODO(pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(input_dim, output_dim) if mixture_feedforward is not None: check_dimensions_match(output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim") check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, mixture_feedforward: FeedForward = None, dropout: float = 0.0) -> None: super().__init__(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout) self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0])) self._mixture_feedforward = mixture_feedforward if mixture_feedforward is not None: check_dimensions_match(encoder_output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim") check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value")
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, classifier_feedforward: FeedForward, context_encoder: Optional[Seq2SeqEncoder] = None, response_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DialogueContextCoherenceAttentionClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = vocab.get_vocab_size("labels") self.context_encoder = context_encoder self.response_encoder = response_encoder self.attend_feedforward = TimeDistributed(attend_feedforward) self.matrix_attention = MatrixAttention(similarity_function) self.compare_feedforward = TimeDistributed(compare_feedforward) self.classifier_feedforward = classifier_feedforward labels = self.vocab.get_index_to_token_vocabulary('labels') pos_label_index = list(labels.keys())[list(labels.values()).index('neg')] check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(classifier_feedforward.get_output_dim(), self.num_classes, "final output dimension", "number of labels") self.metrics = { "accuracy": CategoricalAccuracy() # "f1": F1Measure(positive_label=pos_label_index) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab, feature_size: int, max_span_width: int, keep_rate: int, mlp_dropout: float = 0.4, embedder_type=None, regularizer: Optional[RegularizerApplicator] = None) -> None: super(PrePruner, self).__init__(vocab, regularizer) self.keep_rate = keep_rate self.embedder = get_embeddings(embedder_type, self.vocab) self.ffn = FeedForward(300, 2, 300, F.relu, 0.5) embedding_dim = self.embedder.get_output_dim() self._span_extractor = PoolingSpanExtractor( embedding_dim, num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) entity_feedforward = FeedForward(self._span_extractor.get_output_dim(), 2, 150, F.relu, mlp_dropout) self.feedforward_scorer = torch.nn.Sequential( TimeDistributed(entity_feedforward), TimeDistributed( torch.nn.Linear(entity_feedforward.get_output_dim(), 1)), ) self._lexical_dropout = torch.nn.Dropout(p=0.1) self.loss = torch.nn.BCELoss() self._metric_f1 = FBetaMeasure()
def __init__( self, vocab: Vocabulary, mention_feedforward: FeedForward, label_namespace: str, n_features: int = 0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, document_embedding: torch.nn.Embedding = None, doc_to_idx_mapping: dict = None, graph_embedding_dim: int = None, ) -> None: super(SpanClassifier, self).__init__(vocab, regularizer) self._label_namespace = label_namespace self._mention_feedforward = TimeDistributed(mention_feedforward) self._use_graph_embeddings = graph_embedding_dim is not None features_dim = n_features + graph_embedding_dim if self._use_graph_embeddings else n_features self._ner_scorer = TimeDistributed( torch.nn.Linear( mention_feedforward.get_output_dim() + features_dim, 1)) self._ner_metrics = BinaryThresholdF1() self._document_embedding = document_embedding self._doc_to_idx_mapping = doc_to_idx_mapping initializer(self)
def __init__(self, text_field_embedder: TextFieldEmbedder, # frozen_embeddings: bool = True, mapper: FeedForward = None, bias: bool = True, pre_normalization: Normalization = None, post_normalization: Normalization = None, normalization: Normalization = None): super().__init__() self._bias = bias self._text_field_embedder = text_field_embedder self._output_dim = self._text_field_embedder.get_output_dim() if normalization is not None: pre_normalization = normalization self._pre_normalization = pre_normalization self._post_normalization = post_normalization # self._frozen_embeddings = frozen_embeddings # if self._frozen_embeddings: # self._text_field_embedder.requires_grad_(False) # TODO Make sure mapper supports time-distributed out-of-the-box. if mapper is not None: self._mapper = mapper self._output_dim = mapper.get_output_dim() else: if bias: self._mapper = BiasOnly(self._output_dim) else: self._mapper = Module()
def __init__(self, vocab: Vocabulary, bert: TextFieldEmbedder, classifier: FeedForward, dropout: float = 0.1, num_labels: int = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._bert = bert self._dropout = torch.nn.Dropout(dropout) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._classifier = classifier if num_labels is None: self._num_labels = vocab.get_vocab_size(namespace="labels") else: self._num_labels = num_labels check_dimensions_match(bert.get_output_dim() * 2, classifier.get_input_dim(), "bert output dim", "classifier input dim") check_dimensions_match(classifier.get_output_dim(), self._num_labels, "classifier output dim", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, bert: TextFieldEmbedder, encoder: Seq2SeqEncoder, classifier: FeedForward, aggregation: str = 'cls+max', dropout: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._bert = bert self._encoder = encoder self._aggregation = aggregation self._dropout = torch.nn.Dropout(dropout) self._classifier = classifier self._num_labels = vocab.get_vocab_size(namespace="labels") self._pooler = FeedForward(input_dim=bert.get_output_dim(), num_layers=1, hidden_dims=bert.get_output_dim(), activations=torch.tanh) check_dimensions_match(bert.get_output_dim(), encoder.get_input_dim(), "bert output dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 2, classifier.get_input_dim(), "encoder output dim", "classifier input dim") check_dimensions_match(classifier.get_output_dim(), self._num_labels, "classifier output dim", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), class_weights: List[float] = (1.0, 1.0), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self.classifier_feedforward = classifier_feedforward self.num_classes = self.vocab.get_vocab_size('labels') assert self.num_classes == classifier_feedforward.get_output_dim() # if classifier_feedforward.get_input_dim() != 768: # raise ConfigurationError(F"The input dimension of the classifier_feedforward, " # F"found {classifier_feedforward.get_input_dim()}, must match the " # F" output dimension of the bert embeder, {768}") index = 0 if self.num_classes == 2: index = self.vocab.get_token_index("正类", "labels") self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(index) } # weights = torch.Tensor(class_weights) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, matrix_attention: MatrixAttention, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = matrix_attention self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input", ) check_dimensions_match( projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim", ) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._debug = 2 initializer(self)
def __init__( self, vocab: Vocabulary = None, antecedent_feedforward: FeedForward = None, relation_cardinality: int = 2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(RelationExtractor, self).__init__(vocab, regularizer) self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) self._span_embedding_size = antecedent_feedforward.get_input_dim() // 4 self._bias_vectors = torch.nn.Parameter( torch.zeros((1, 4, self._span_embedding_size))) self._relation_cardinality = relation_cardinality self._pos_weight_dict = {2: 1.0, 3: 1.0, 4: 3.3} self._pos_weight = self._pos_weight_dict[relation_cardinality] self._relation_type_map = { tuple(e): i for i, e in enumerate( combinations(used_entities, self._relation_cardinality)) } self._binary_scores = BinaryThresholdF1() self._global_scores = NAryRelationMetrics() initializer(self)
def __init__(self, vocab: Vocabulary, mention_feedforward: FeedForward, feature_size: int, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(NERTagger_Has_None, self).__init__(vocab, regularizer) # Number of classes determine the output dimension of the final layer self._n_labels = vocab.get_vocab_size('ner_labels') # TODO(dwadden) think of a better way to enforce this. # Null label is needed to keep track of when calculating the metrics null_label = vocab.get_token_index("", "ner_labels") assert null_label == 0 # If not, the dummy class won't correspond to the null label. self._ner_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), self._n_labels))) self._ner_metrics = NERMetrics(self._n_labels, null_label) self._loss = torch.nn.CrossEntropyLoss(reduction="sum") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, complex_word_feedforward: FeedForward, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(NeuralMutilingualCWI, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._complex_word_scorer = torch.nn.Sequential( complex_word_feedforward, torch.nn.Linear(complex_word_feedforward.get_output_dim(), 1)) self._target_word_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y") self._loss = torch.nn.BCELoss() self._metric = F1Measure(1) if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__( self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, activation: Activation = Activation.by_name("relu")(), add_action_bias: bool = True, mixture_feedforward: FeedForward = None, dropout: float = 0.0, num_layers: int = 1, ) -> None: super().__init__( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, activation=activation, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers, ) self._mixture_feedforward = mixture_feedforward if mixture_feedforward is not None: check_dimensions_match( encoder_output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim", ) check_dimensions_match( mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value", )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = DEFAULT_EVALB_DIR, ) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed( feedforward) if feedforward else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim", ) check_dimensions_match( encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim", ) if feedforward is not None: check_dimensions_match( span_extractor.get_output_dim(), feedforward.get_input_dim(), "span extractor output dim", "feedforward input dim", ) self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def test_get_dimension_is_correct(self): feedforward = FeedForward(input_dim=10, num_layers=1, hidden_dims=10, activations="linear") encoder = FeedForwardEncoder(feedforward) assert encoder.get_input_dim() == feedforward.get_input_dim() assert encoder.get_output_dim() == feedforward.get_output_dim()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CoreferenceResolver, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = SpanPruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed(torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) self._endpoint_span_extractor = EndpointSpanExtractor(context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor(input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, attention_function: SimilarityFunction, num_start_types: int, num_entity_types: int, mixture_feedforward: FeedForward = None, dropout: float = 0.0, unlinked_terminal_indices: List[int] = None) -> None: super(WikiTablesDecoderStep, self).__init__() self._mixture_feedforward = mixture_feedforward self._entity_type_embedding = Embedding(num_entity_types, action_embedding_dim) self._input_attention = Attention(attention_function) self._num_start_types = num_start_types self._start_type_predictor = Linear(encoder_output_dim, num_start_types) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. output_dim = encoder_output_dim input_dim = output_dim # Our decoder input will be the concatenation of the decoder hidden state and the previous # action embedding, and we'll project that down to the decoder's `input_dim`, which we # arbitrarily set to be the same as `output_dim`. self._input_projection_layer = Linear( output_dim + action_embedding_dim, input_dim) # Before making a prediction, we'll compute an attention over the input given our updated # hidden state, and optionally a difference between the current checklist vector and its # target, if we are training to maximize coverage using a checklist. Then we concatenate # those with the decoder state and project to `action_embedding_dim` to make a prediction. if unlinked_terminal_indices is None: self._output_projection_layer = Linear( output_dim + encoder_output_dim, action_embedding_dim) else: unlinked_checklist_size = len(unlinked_terminal_indices) self._output_projection_layer = Linear( output_dim + encoder_output_dim + unlinked_checklist_size, action_embedding_dim) self._unlinked_terminal_indices = unlinked_terminal_indices # TODO(pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(input_dim, output_dim) if mixture_feedforward is not None: check_dimensions_match(output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim") check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x
def __init__(self, vocab: Vocabulary, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, spans_per_word: float, span_emb_dim: int, max_antecedents: int, coref_prop: int = 0, coref_prop_dropout_f: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), # TODO(dwadden add this). regularizer: Optional[RegularizerApplicator] = None) -> None: super(CorefResolver, self).__init__(vocab, regularizer) self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = Pruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed(torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() self.coref_prop = coref_prop self._f_network = FeedForward(input_dim=2*span_emb_dim, num_layers=1, hidden_dims=span_emb_dim, activations=torch.nn.Sigmoid(), dropout=coref_prop_dropout_f) #self._f_network2 = FeedForward(input_dim=2*span_emb_dim, # num_layers=1, # hidden_dims=1, # activations=torch.nn.Sigmoid(), # dropout=coref_prop_dropout_f) self.antecedent_softmax = torch.nn.Softmax(dim=-1) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, parser_model_path: str, parser_cuda_device: int, freeze_parser: bool, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._parser = load_archive(parser_model_path, cuda_device=parser_cuda_device).model self._parser._head_sentinel.requires_grad = False for child in self._parser.children(): for param in child.parameters(): param.requires_grad = False if not freeze_parser: for param in self._parser.encoder.parameters(): param.requires_grad = True initializer(self)
def __init__(self, vocab: Vocabulary, input_embedder: TextFieldEmbedder, nli_projection_layer: FeedForward, training_tasks: Any, validation_tasks: Any, langs_print_train: List[str] = ["en", "fr", "de", "ur", "sw"], dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, feed_lang_ids: bool = True, avg: bool = False) -> None: super(SimpleProjectionXlm, self).__init__(vocab, regularizer) self._avg = avg if type(training_tasks) == dict: self._training_tasks = list(training_tasks.keys()) else: self._training_tasks = training_tasks if type(validation_tasks) == dict: self._validation_tasks = list(validation_tasks.keys()) else: self._validation_tasks = validation_tasks self._input_embedder = input_embedder self._label_namespace = "labels" self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) self._nli_projection_layer = nli_projection_layer print( vocab.get_token_to_index_vocabulary( namespace=self._label_namespace)) assert nli_projection_layer.get_output_dim() == self._num_labels self._dropout = torch.nn.Dropout(p=dropout) self._loss = torch.nn.CrossEntropyLoss() initializer(self._nli_projection_layer) self._nli_per_lang_acc: Dict[str, CategoricalAccuracy] = dict() for taskname in self._validation_tasks: # this will hide some metrics from tqdm, but they will still be computed self._nli_per_lang_acc[taskname] = CategoricalAccuracy() self._nli_avg_acc = Average() self._langs_pring_train = langs_print_train or "en" if '*' in self._langs_pring_train: self._langs_pring_train = [t.split("")[-1] for t in training_tasks] self._feed_lang_ids = feed_lang_ids
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, num_start_types: int, num_entity_types: int, mixture_feedforward: FeedForward = None, dropout: float = 0.0, unlinked_terminal_indices: List[int] = None) -> None: super(WikiTablesDecoderStep, self).__init__() self._mixture_feedforward = mixture_feedforward self._entity_type_embedding = Embedding(num_entity_types, action_embedding_dim) self._input_attention = input_attention self._num_start_types = num_start_types self._start_type_predictor = Linear(encoder_output_dim, num_start_types) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. output_dim = encoder_output_dim input_dim = output_dim # Our decoder input will be the concatenation of the decoder hidden state and the previous # action embedding, and we'll project that down to the decoder's `input_dim`, which we # arbitrarily set to be the same as `output_dim`. self._input_projection_layer = Linear(output_dim + action_embedding_dim, input_dim) # Before making a prediction, we'll compute an attention over the input given our updated # hidden state. Then we concatenate those with the decoder state and project to # `action_embedding_dim` to make a prediction. self._output_projection_layer = Linear(output_dim + encoder_output_dim, action_embedding_dim) if unlinked_terminal_indices is not None: # This means we are using coverage to train the parser. # These factors are used to add the embeddings of yet to be produced actions to the # predicted embedding, and to boost the action logits of yet to be produced linked # actions, respectively. self._unlinked_checklist_multiplier = Parameter(torch.FloatTensor([1.0])) self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0])) self._unlinked_terminal_indices = unlinked_terminal_indices # TODO(pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(input_dim, output_dim) if mixture_feedforward is not None: check_dimensions_match(output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim") check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward_layer: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = None) -> None: super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed(feedforward_layer) if feedforward_layer else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward_layer is not None: output_dim = feedforward_layer.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim") if feedforward_layer is not None: check_dimensions_match(span_extractor.get_output_dim(), feedforward_layer.get_input_dim(), "span extractor output dim", "feedforward input dim") self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)