def __init__( self, encoder_output_dim: int, # 400+200gnn=600 action_embedding_dim: int, # 200 input_attention: Attention, # {"type": "dot_product"} past_attention: Attention, # {"type": "dot_product"} activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, # False num_start_types: int = None, add_action_bias: bool = True, # True dropout: float = 0.0, # 0.5 num_layers: int = 1) -> None: # 1 super().__init__( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers) self._past_attention = past_attention self._ent2ent_ff = FeedForward(1, 1, 1, Activation.by_name('linear')())
def __init__(self, config: ParsingConfig): assert isinstance(config, ParsingConfig) super().__init__(config) self.config = config encoder_dim = config.decoder_config.output_dim if self.config.use_pos: self.pos_embedding = nn.Embedding(config.num_pos, config.pos_dim, padding_idx=0) encoder_dim += config.pos_dim self.head_arc_feedforward = FeedForward(encoder_dim, 1, config.arc_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(config.arc_dim, config.arc_dim, use_input_biases=True) self.head_tag_feedforward = FeedForward(encoder_dim, 1, config.tag_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(config.tag_dim, config.tag_dim, config.num_labels) self.dropout = InputVariationalDropout(config.dropout) self.use_mst_decoding_for_validation = config.use_mst_decoding_for_validation
def __init__(self, sentence_encoder: Seq2VecEncoder, doc_encoder: Seq2VecEncoder, query_encoder: Seq2VecEncoder, use_encoded: bool = False, scorer: Optional[FeedForward] = None, sentence_attention: Optional[Attention] = None, document_attention: Optional[Attention] = None) -> None: super(Seq2VecSentenceScorer, self).__init__() self.sentence_encoder = sentence_encoder self.doc_encoder = doc_encoder self.query_encoder = query_encoder self.use_encoded = use_encoded self.sentence_attention = sentence_attention self.document_attention = document_attention # get the dimensions for the scorer and for sanity checking q_dim = self.query_encoder.get_output_dim() d_dim = self.doc_encoder.get_output_dim() input_dim = (q_dim + d_dim) if use_encoded: input_dim *= 2 # set up the scorer if scorer is None: scorer = FeedForward( input_dim=input_dim, num_layers=1, hidden_dims=1, activations=Activation.by_name('linear')(), dropout=0.) self.query_transformer = FeedForward( input_dim=q_dim, num_layers=1, hidden_dims=q_dim, activations=Activation.by_name('tanh')(), dropout=0.2) self.scorer = scorer # assertions to ensure our shapes match our assumptions assert q_dim == d_dim assert self.scorer.get_output_dim() == 1 assert self.scorer.get_input_dim() == input_dim
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, past_attention: Attention, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, dropout: float = 0.0, num_layers: int = 1) -> None: super().__init__(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, activation=activation, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers) self._past_attention = past_attention self._ent2ent_ff = FeedForward(1, 1, 1, Activation.by_name('linear')()) self._action2gate = FeedForward(201, 1, 1, Activation.by_name('sigmoid')()) self._output_type_projection_layer = Linear( encoder_output_dim + encoder_output_dim, action_embedding_dim)
def test_rnn_sentence_extractor(self): # Hyperparameters batch_size = 3 num_sents = 5 input_hidden_size = 7 hidden_size = 11 # Setup a model gru = GRU(input_size=input_hidden_size, hidden_size=hidden_size, bidirectional=True, batch_first=True) rnn = PytorchSeq2SeqWrapper(gru) feed_forward = FeedForward(input_dim=hidden_size * 2, num_layers=2, hidden_dims=[10, 1], activations=[Activation.by_name('tanh')(), Activation.by_name('linear')()]) extractor = RNNSentenceExtractor(rnn, feed_forward) # Setup some dummy data sentence_encodings = torch.randn(batch_size, num_sents, input_hidden_size) mask = torch.ones(batch_size, num_sents) # Pass the data through and verify the size of the output extraction_scores = extractor(sentence_encodings, mask) assert extraction_scores.size() == (batch_size, num_sents)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, dropout: float = 0.5, input_dropout: float = 0.5, head_tag_temperature: Optional[float] = None, head_temperature: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(Supertagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = \ arc_feedforward or FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")(), dropout=dropout) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = \ tag_feedforward or FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")(), dropout=dropout) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearWithBias(tag_representation_dim, tag_representation_dim, num_labels) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._input_dropout = Dropout(input_dropout) self._attachment_scores = CategoricalAccuracy() self._tagging_accuracy = CategoricalAccuracy() self.head_tag_temperature = head_tag_temperature self.head_temperature = head_temperature initializer(self)
def __init__( self, vocab: Vocabulary, input_unit: Seq2VecEncoder, text_field_embedder: TextFieldEmbedder, # embedding_projection_dim: int = None, classifier_feedforward: FeedForward = None, max_step: int = 12, n_memories: int = 3, self_attention: bool = False, memory_gate: bool = False, dropout: int = 0.15, loss_weights=None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_classes = max(self.vocab.get_vocab_size("labels"), 2) self.text_field_embedder = text_field_embedder self.proj = nn.Linear(text_field_embedder.get_output_dim(), input_unit.get_input_dim()) self.input_unit = input_unit self.mac = MACCell( text_field_embedder.get_output_dim( ), # input_unit.get_output_dim(), max_step=max_step, n_memories=n_memories, self_attention=self_attention, memory_gate=memory_gate, dropout=dropout, save_attns=False, ) hidden_size = 2 * input_unit.get_output_dim() n_layers = 3 self.classifier = classifier_feedforward or FeedForward( input_dim=hidden_size, num_layers=n_layers, hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes], activations=[ Activation.by_name("relu")(), Activation.by_name("relu")(), Activation.by_name("linear")() ], dropout=[dropout, dropout, 0.0]) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), "weighted_f1": WeightedF1Measure(), "fbeta": FBetaMeasure(average='micro') } weights = loss_weights and torch.FloatTensor(loss_weights) self.loss = nn.CrossEntropyLoss(weight=weights) initializer(self)
def __init__( self, input_dim: int, # input embedding dimension num_layers: int = 6, num_heads: int = 8, feedforward_hidden_dim: int = None, feedforward_dropout: float = 0.1, attention_dim: int = None, value_dim: int = None, residual_dropout: float = 0.1, attention_dropout: float = 0.1, use_positional_embedding: bool = True, ): super(TransformerEncoder, self).__init__() self._attention_layers: List[MultiHeadSelfAttention] = [] self._attention_norm_layers: List[LayerNorm] = [] self._feedforward_layers: List[FeedForward] = [] self._feedforward_norm_layers: List[LayerNorm] = [] hidden_dim = input_dim attention_dim = attention_dim or (hidden_dim // num_heads) value_dim = value_dim or (hidden_dim // num_heads) feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim for i in range(num_layers): attention = MultiHeadSelfAttention( num_heads, hidden_dim, attention_dim * num_heads, value_dim * num_heads, attention_dropout=attention_dropout) self.add_module(f'attention_{i}', attention) self._attention_layers.append(attention) attention_norm = LayerNorm(hidden_dim) self.add_module(f'attention_norm_{i}', attention_norm) self._attention_norm_layers.append(attention_norm) feedfoward = FeedForward( hidden_dim, num_layers=2, hidden_dims=[feedforward_hidden_dim, hidden_dim], activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], dropout=feedforward_dropout) self.add_module(f"feedforward_{i}", feedfoward) self._feedforward_layers.append(feedfoward) feedforward_norm = LayerNorm(hidden_dim) self.add_module(f"feedforward_norm_{i}", feedforward_norm) self._feedforward_norm_layers.append(feedforward_norm) self._dropout = torch.nn.Dropout(residual_dropout) self.input_dim = input_dim self.hidden_dim = hidden_dim self._use_positional_embedding = use_positional_embedding
def __init__(self, vocab: Vocabulary, encoder_dim: int, label_dim: int, edge_dim: int, dropout: float, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None) -> None: """ Parameters ---------- vocab : ``Vocabulary``, required A Vocabulary, required in order to compute sizes for input/output projections. encoder_dim : ``int``, required. The output dimension of the encoder. label_dim : ``int``, required. The dimension of the MLPs used for dependency tag prediction. edge_dim : ``int``, required. The dimension of the MLPs used for head arc prediction. tag_feedforward : ``FeedForward``, optional, (default = None). The feedforward network used to produce tag representations. By default, a 1 layer feedforward network with an elu activation is used. arc_feedforward : ``FeedForward``, optional, (default = None). The feedforward network used to produce arc representations. By default, a 1 layer feedforward network with an elu activation is used. dropout : ``float``, optional, (default = 0.0) The variational dropout applied to the output of the encoder and MLP layers. """ super(DMEdges, self).__init__(vocab) self._encoder_dim = encoder_dim self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, edge_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(edge_dim, edge_dim, use_input_biases=True) num_labels = vocab.get_vocab_size("head_tags") #= edge labels self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, label_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(label_dim, label_dim, num_labels) self._dropout = InputVariationalDropout(dropout) check_dimensions_match(label_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(edge_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim")
def init_sig(vocab, d_embedding, embedding_dropout_p, sig_depth, logsig, all_code_types, feedforward_num_layers, feedforward_hidden_dims, feedforward_activations, feedforward_dropout, leadlag, add_time, t_max, t_scale, use_timestamps, split_paths): # Init feedward params feedforward_hidden_dims = [feedforward_hidden_dims ] * feedforward_num_layers feedforward_activations = [Activation.by_name(feedforward_activations)() ] * feedforward_num_layers feedforward_dropout = [feedforward_dropout] * feedforward_num_layers # Needed for final layer feedforward_num_layers += 1 feedforward_hidden_dims.append(1) feedforward_activations.append(Activation.by_name('linear')()) feedforward_dropout.append(0) token_embedding = Embedding(num_embeddings=vocab.get_vocab_size(), embedding_dim=d_embedding) # Handle Augmentations augmentations = [] if add_time: augmentations.append('add_time') if leadlag: augmentations.append('leadlag') d_embedding_updated = update_dims(augmentations, d_embedding) i_augmentations = init_augmentations(augmentations, use_timestamps=use_timestamps, t_max=t_max, t_scale=t_scale) # Embedder maps the input tokens to the appropriate embedding matrix word_embeddings: TextFieldEmbedder = BasicTextFieldEmbedder( {"tokens": token_embedding}) # Encoder takes path of (N, L, C) and encodes into state vector # encoder = BagOfEmbeddingsEncoder(embedding_dim=d_embedding) encoder: Seq2VecEncoder = SignatureEncoder(input_dim=d_embedding_updated, depth=sig_depth, logsig=logsig) classifier_feedforward: FeedForward = FeedForward( input_dim=encoder.get_output_dim() * 3 if (all_code_types and split_paths) else encoder.get_output_dim(), num_layers=feedforward_num_layers, hidden_dims=feedforward_hidden_dims, activations=feedforward_activations, dropout=feedforward_dropout) model = BaseModel(vocab, word_embeddings, encoder, classifier_feedforward, augmentations=i_augmentations, embedding_dropout_p=embedding_dropout_p) return model
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, word_encoder: Seq2SeqEncoder, sentence_encoder: Seq2SeqEncoder, classifier_feedforward: Union[FeedForward, Maxout], attended_text_dropout: float = 0.0, bce_pos_weight: int = 10, use_positional_encoding: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EtdHAN, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.word_encoder = word_encoder self.word_level_attention = FeedForward(word_encoder.get_output_dim(), 2, [word_encoder.get_output_dim(), 1], [Activation.by_name("tanh")(), Activation.by_name("linear")()], [True, False]) self.sentence_encoder = sentence_encoder self.sentence_level_attention = FeedForward(sentence_encoder.get_output_dim(), 2, [sentence_encoder.get_output_dim(), 1], [Activation.by_name("tanh")(), Activation.by_name("linear")()], [True, False]) self.classifier_feedforward = classifier_feedforward self.use_positional_encoding = use_positional_encoding self._dropout = torch.nn.Dropout(attended_text_dropout) if text_field_embedder.get_output_dim() != word_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the word_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), word_encoder.get_input_dim())) self.metrics = { # "roc_auc_score": RocAucScore() "hit_5": HitAtK(5), "hit_10": HitAtK(10), "precision_5": PrecisionAtK(5), "precision_10": PrecisionAtK(10) # "hit_100": HitAtK(100), # "macro_measure": MacroF1Measure(top_k=5,num_label=self.num_classes) } self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.ones(self.num_classes)*bce_pos_weight) initializer(self)
def from_params(cls, params: Params): input_dim = params.pop('input_dim') num_layers = params.pop('num_layers') hidden_dims = params.pop('hidden_dims') activations = params.pop('activations') if isinstance(activations, list): activations = [Activation.by_name(name)() for name in activations] else: activations = Activation.by_name(activations)() params.assert_empty(cls.__name__) return cls(input_dim=input_dim, num_layers=num_layers, hidden_dims=hidden_dims, activations=activations)
def __init__( self, input_dim: int, num_heads: int = 8, attention_dim: Optional[int] = None, value_dim: Optional[int] = None, feedforward_hidden_dim: int = None, residual_dropout: float = 0.1, attention_dropout: float = 0.1, feedforward_dropout: float = 0.1, use_vanilla_wiring: bool = False, ): super(UTDecBlock, self).__init__() hidden_dim = input_dim attention_dim = attention_dim or (hidden_dim // num_heads) value_dim = value_dim or (hidden_dim // num_heads) feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim self._masked_attention = MaskedMultiHeadSelfAttention( num_heads, hidden_dim, attention_dim * num_heads, value_dim * num_heads, attention_dropout=attention_dropout) self._masked_attention_norm = LayerNorm(hidden_dim) self._attention = MultiHeadAttention( num_heads, hidden_dim, hidden_dim, attention_dim * num_heads, value_dim * num_heads, attention_dropout=attention_dropout) self._dropout = torch.nn.Dropout(residual_dropout) self._attention_norm = LayerNorm(hidden_dim) # use feedforward net as transition function self._feedforward = FeedForward( hidden_dim, num_layers=2, hidden_dims=[feedforward_hidden_dim, hidden_dim], activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], dropout=feedforward_dropout) self._feedforward_norm = LayerNorm(hidden_dim) self._use_vanilla_wiring = use_vanilla_wiring
def prepare_model(args, vocab): text_field_embedder = prepare_text_field_embedder(args, vocab) seq2seq_encoder = prepare_context_encoder( encoder_type=args.encoder_type, input_size=text_field_embedder.get_output_dim(), encoder_layer_num=args.encoder_layer, encoder_size=args.encoder_size, encoder_dropout=args.encoder_dropout) seq2vec_encoder = CnnEncoder( embedding_dim=seq2seq_encoder.get_output_dim(), num_filters=args.cnn_hidden, ngram_filter_sizes=args.cnn_window, conv_layer_activation=Activation.by_name('linear')()) model = Seq2VecClassificationModel( vocab=vocab, text_field_embedder=text_field_embedder, seq2seq_encoder=seq2seq_encoder, seq2vec_encoder=seq2vec_encoder, dropout=args.classifier_dropout, classification_type=args.classification_type, pos_label=args.positive_label, ) return model
def __init__(self, embedding_dim: int, num_filters: int, ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5), # pylint: disable=bad-whitespace conv_layer_activation: Activation = Activation.by_name('relu')(), output_dim: Optional[int] = None) -> None: super(CnnEncoder, self).__init__() self._embedding_dim = embedding_dim self._num_filters = num_filters self._ngram_filter_sizes = ngram_filter_sizes self._activation = conv_layer_activation self._output_dim = output_dim self._convolution_layers = [Conv1d(in_channels=self._embedding_dim, out_channels=self._num_filters, kernel_size=ngram_size) for ngram_size in self._ngram_filter_sizes] for i, conv_layer in enumerate(self._convolution_layers): self.add_module('conv_layer_%d' % i, conv_layer) maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes) if self._output_dim: self.projection_layer = Linear(maxpool_output_dim, self._output_dim) else: self.projection_layer = None self._output_dim = maxpool_output_dim
def __init__(self, config, num_labels: int, num_pos: int, use_pos: bool, arc_representation_dim: int, arc_feedforward: FeedForward = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.) -> None: super(DistanceDependencyParser, self).__init__(config) self.bert = BertModel(config) self.apply(self.init_bert_weights) encoder_dim = config.hidden_size self.arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("linear")()) self.arc_attention = DistanceAttention() self._dropout = InputVariationalDropout(dropout) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation self._attachment_scores = UndirectedAttachmentScores()
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, output_attention: Attention, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, mixture_feedforward: FeedForward = None, dropout: float = 0.0, num_layers: int = 1) -> None: super().__init__(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers, mixture_feedforward=mixture_feedforward) self._output_attention = output_attention # override self._input_projection_layer = Linear(encoder_output_dim + action_embedding_dim, encoder_output_dim) self._attend_output_projection_layer = Linear(encoder_output_dim*2, encoder_output_dim) self._first_attended_output = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_attended_output)
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, mixture_feedforward: FeedForward = None, dropout: float = 0.0) -> None: super().__init__(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout) self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0])) self._mixture_feedforward = mixture_feedforward if mixture_feedforward is not None: check_dimensions_match(encoder_output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim") check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value")
def __init__(self, embedding_dim , num_filters , ngram_filter_sizes = (2, 3, 4, 5), # pylint: disable=bad-whitespace conv_layer_activation = None, output_dim = None) : super(CnnEncoder, self).__init__() self._embedding_dim = embedding_dim self._num_filters = num_filters self._ngram_filter_sizes = ngram_filter_sizes self._activation = conv_layer_activation or Activation.by_name(u'relu')() self._output_dim = output_dim self._convolution_layers = [Conv1d(in_channels=self._embedding_dim, out_channels=self._num_filters, kernel_size=ngram_size) for ngram_size in self._ngram_filter_sizes] for i, conv_layer in enumerate(self._convolution_layers): self.add_module(u'conv_layer_%d' % i, conv_layer) maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes) if self._output_dim: self.projection_layer = Linear(maxpool_output_dim, self._output_dim) else: self.projection_layer = None self._output_dim = maxpool_output_dim
def from_params(cls, params: Params): input_dim = params.pop_int('input_dim') num_layers = params.pop_int('num_layers') hidden_dims = params.pop('hidden_dims') activations = params.pop('activations') dropout = params.pop('dropout', 0.0) if isinstance(activations, list): activations = [Activation.by_name(name)() for name in activations] else: activations = Activation.by_name(activations)() params.assert_empty(cls.__name__) return cls(input_dim=input_dim, num_layers=num_layers, hidden_dims=hidden_dims, activations=activations, dropout=dropout)
def __init__( self, embedding_dim: int, num_filters: int, ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5), # pylint: disable=bad-whitespace conv_layer_activation: Activation = None, output_dim: Optional[int] = None) -> None: super(ExplainableCnnEncoder, self).__init__() self._embedding_dim = embedding_dim self._num_filters = num_filters self._ngram_filter_sizes = ngram_filter_sizes self._activation = conv_layer_activation or Activation.by_name( 'relu')() self._output_dim = output_dim self._convolution_layers = [(Conv1d(in_channels=self._embedding_dim, out_channels=self._num_filters, kernel_size=ngram_size), MaxPool1dAll(kernel_size=None)) for ngram_size in self._ngram_filter_sizes] for i, (conv_layer, maxpool_layer) in enumerate(self._convolution_layers): self.add_module('conv_layer_%d' % i, conv_layer) self.add_module('maxpool_layer_%d' % i, maxpool_layer) maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes) if self._output_dim: self.projection_layer = Linear(maxpool_output_dim, self._output_dim) else: self.projection_layer = None self._output_dim = maxpool_output_dim
def __init__(self, vector_dim: int, matrix_dim: int, attention_dim: int, values_dim: int, num_heads: int = 1, activation: Activation = None, attention_dropout_prob: float = 0.0, normalize=True) -> None: super().__init__(normalize) self._num_heads = num_heads self._attention_dim = attention_dim self._values_dim = values_dim self._output_dim = matrix_dim if attention_dim % num_heads != 0: raise ValueError( f"Key size ({attention_dim}) must be divisible by the number of " f"attention heads ({num_heads}).") self._combined_projection = nn.Linear(matrix_dim, attention_dim + values_dim) self._query_projection = nn.Linear(vector_dim, attention_dim) self._scale = (attention_dim // num_heads)**0.5 # self._output_projection = Linear(values_dim, self._output_dim) self._attention_dropout = nn.Dropout(attention_dropout_prob) self._output_projection = nn.Linear(values_dim, self._output_dim) self._activation = activation or Activation.by_name('linear')() self._num_heads = num_heads self.reset_parameters()
def __init__(self, vocab: Vocabulary, sentence_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, attention: Attention, decoder_beam_search: BeamSearch, max_decoding_steps: int, dropout: float = 0.0) -> None: super(NlvrDirectSemanticParser, self).__init__(vocab=vocab, sentence_embedder=sentence_embedder, action_embedding_dim=action_embedding_dim, encoder=encoder, dropout=dropout) self._decoder_trainer = MaximumMarginalLikelihood() self._decoder_step = BasicTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=attention, activation=Activation.by_name('tanh')(), add_action_bias=False, dropout=dropout) self._decoder_beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps self._action_padding_index = -1
def __init__(self, tensor_1_dim, tensor_2_dim, activation=None): super(BilinearSimilarity, self).__init__() self._weight_matrix = Parameter( torch.Tensor(tensor_1_dim, tensor_2_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name(u'linear')() self.reset_parameters()
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout: float = 0.1, ff_dim: int = 100): super().__init__(vocab) self.embedder = embedder self.encoder = encoder assert self.embedder.get_output_dim() == self.encoder.get_input_dim() self.feedforward = FeedForward( encoder.get_output_dim(), 1, hidden_dims=ff_dim, activations=Activation.by_name('relu')(), dropout=dropout) self.out = torch.nn.Linear( in_features=self.feedforward.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.crf = ConditionalRandomField(vocab.get_vocab_size('labels')) self.f1 = FBetaMeasure(average='micro') self.accuracy = CategoricalAccuracy() self.idx_to_label = vocab.get_index_to_token_vocabulary('labels')
def __init__(self, matrix_1_dim: int, matrix_2_dim: int, activation: Activation = None, use_input_biases: bool = False, label_dim: int = 1) -> None: super(BilinearMatrixAttentionV2, self).__init__() if label_dim == 1: self._weight_matrix = torch.nn.Parameter( torch.Tensor(matrix_1_dim, matrix_2_dim)) else: self._weight_matrix = torch.nn.Parameter( torch.Tensor(label_dim, matrix_1_dim, matrix_2_dim)) if use_input_biases: self._weight_bias1 = torch.nn.Parameter( torch.Tensor(label_dim, matrix_1_dim)) self._weight_bias2 = torch.nn.Parameter( torch.Tensor(label_dim, matrix_2_dim)) self.use_input_biases = use_input_biases self._bias = torch.nn.Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name('linear')() self.reset_parameters()
def __init__( self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, activation: Activation = Activation.by_name("relu")(), add_action_bias: bool = True, mixture_feedforward: FeedForward = None, dropout: float = 0.0, num_layers: int = 1, ) -> None: super().__init__( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, activation=activation, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers, ) self._mixture_feedforward = mixture_feedforward if mixture_feedforward is not None: check_dimensions_match( encoder_output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim", ) check_dimensions_match( mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value", )
def __init__(self, vocab: Vocabulary, sentence_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, attention: Attention, decoder_beam_search: BeamSearch, max_decoding_steps: int, dropout: float = 0.0) -> None: super(NlvrDirectSemanticParser, self).__init__(vocab=vocab, sentence_embedder=sentence_embedder, action_embedding_dim=action_embedding_dim, encoder=encoder, dropout=dropout) self._decoder_trainer = MaximumMarginalLikelihood() self._decoder_step = BasicTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=attention, num_start_types=1, activation=Activation.by_name('tanh')(), predict_start_type_separately=False, add_action_bias=False, dropout=dropout) self._decoder_beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps self._action_padding_index = -1
def __init__(self, encoder_output_dim: int, decoder_input_dim: int, action_embedding_dim: int, input_attention: Attention, sql_attention: Attention = None, sql_output_dim: int = 100, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, copy_gate: FeedForward = None, dropout: float = 0.0, num_layers: int = 1) -> None: super().__init__( encoder_output_dim=encoder_output_dim, decoder_input_dim=decoder_input_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, sql_attention=sql_attention, sql_output_dim=sql_output_dim, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers) # control the copy gate self._copy_gate = copy_gate
def __init__( self, embedding_dim: int, num_filters: int, ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5), # pylint: disable=bad-whitespace conv_layer_activation: Activation = None, output_dim: Optional[int] = None) -> None: super(CnnEncoder, self).__init__() self._embedding_dim = embedding_dim self._num_filters = num_filters self._ngram_filter_sizes = ngram_filter_sizes self._activation = conv_layer_activation or Activation.by_name( 'relu')() self._output_dim = output_dim self._convolution_layers = [ Conv1d(in_channels=self._embedding_dim, out_channels=self._num_filters, kernel_size=ngram_size) for ngram_size in self._ngram_filter_sizes ] for i, conv_layer in enumerate(self._convolution_layers): self.add_module('conv_layer_%d' % i, conv_layer) self._output_dim = self._num_filters * len(self._ngram_filter_sizes)
def __init__(self, in_params: int, matrix_1_dim: int, matrix_2_dim: int, activation: Activation = None, use_input_biases: bool = False, label_dim: int = 1) -> None: super().__init__() self.in_params = in_params if use_input_biases: matrix_1_dim += 1 matrix_2_dim += 1 if label_dim == 1: self._weight_matrix = Parameter( torch.Tensor(in_params, matrix_1_dim, matrix_2_dim)) else: self._weight_matrix = Parameter( torch.Tensor(in_params, label_dim, matrix_1_dim, matrix_2_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name('linear')() self._use_input_biases = use_input_biases self.reset_parameters()
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, final_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) # Model components self._embedder = text_field_embedder self._feed_forward = final_feedforward self._cnn_claim_encoder = CnnEncoder( embedding_dim=self._embedder.get_output_dim(), num_filters=100) self._cnn_evidence_encoder = CnnEncoder( embedding_dim=self._embedder.get_output_dim(), num_filters=100) self._static_feedforward_dimension = 300 self._static_feedforward = FeedForward( input_dim=self._cnn_claim_encoder.get_output_dim() * 2, hidden_dims=self._static_feedforward_dimension, num_layers=1, activations=Activation.by_name('relu')()) # For accuracy and loss for training/evaluation of model self._accuracy = CategoricalAccuracy() self._loss = nn.CrossEntropyLoss() # Initialize weights initializer(self)
def __init__( self, embedding_dim: int, num_filters: int, ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5), conv_layer_activation: Activation = None, output_dim: Optional[int] = None, ) -> None: super().__init__() self._embedding_dim = embedding_dim self._num_filters = num_filters self._ngram_filter_sizes = ngram_filter_sizes self._activation = conv_layer_activation or Activation.by_name( "relu")() self._convolution_layers = [ Conv1d( in_channels=self._embedding_dim, out_channels=self._num_filters, kernel_size=ngram_size, ) for ngram_size in self._ngram_filter_sizes ] for i, conv_layer in enumerate(self._convolution_layers): self.add_module("conv_layer_%d" % i, conv_layer) maxpool_output_dim = self._num_filters * len(self._ngram_filter_sizes) if output_dim: self.projection_layer = Linear(maxpool_output_dim, output_dim) self._output_dim = output_dim else: self.projection_layer = None self._output_dim = maxpool_output_dim
def __init__(self, vocab: Vocabulary, sentence_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, attention: Attention, beam_size: int, max_decoding_steps: int, max_num_finished_states: int = None, dropout: float = 0.0, normalize_beam_score_by_length: bool = False, checklist_cost_weight: float = 0.6, dynamic_cost_weight: Dict[str, Union[int, float]] = None, penalize_non_agenda_actions: bool = False, initial_mml_model_file: str = None) -> None: super(NlvrCoverageSemanticParser, self).__init__(vocab=vocab, sentence_embedder=sentence_embedder, action_embedding_dim=action_embedding_dim, encoder=encoder, dropout=dropout) self._agenda_coverage = Average() self._decoder_trainer: DecoderTrainer[Callable[[CoverageState], torch.Tensor]] = \ ExpectedRiskMinimization(beam_size=beam_size, normalize_by_length=normalize_beam_score_by_length, max_decoding_steps=max_decoding_steps, max_num_finished_states=max_num_finished_states) # Instantiating an empty NlvrWorld just to get the number of terminals. self._terminal_productions = set(NlvrWorld([]).terminal_productions.values()) self._decoder_step = CoverageTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=attention, num_start_types=1, activation=Activation.by_name('tanh')(), predict_start_type_separately=False, add_action_bias=False, dropout=dropout) self._checklist_cost_weight = checklist_cost_weight self._dynamic_cost_wait_epochs = None self._dynamic_cost_rate = None if dynamic_cost_weight: self._dynamic_cost_wait_epochs = dynamic_cost_weight["wait_num_epochs"] self._dynamic_cost_rate = dynamic_cost_weight["rate"] self._penalize_non_agenda_actions = penalize_non_agenda_actions self._last_epoch_in_forward: int = None # TODO (pradeep): Checking whether file exists here to avoid raising an error when we've # copied a trained ERM model from a different machine and the original MML model that was # used to initialize it does not exist on the current machine. This may not be the best # solution for the problem. if initial_mml_model_file is not None: if os.path.isfile(initial_mml_model_file): archive = load_archive(initial_mml_model_file) self._initialize_weights_from_archive(archive) else: # A model file is passed, but it does not exist. This is expected to happen when # you're using a trained ERM model to decode. But it may also happen if the path to # the file is really just incorrect. So throwing a warning. logger.warning("MML model file for initializing weights is passed, but does not exist." " This is fine if you're just decoding.")
def __init__(self, vector_dim: int, matrix_dim: int, activation: Activation = None, normalize: bool = True) -> None: super().__init__(normalize) self._weight_matrix = Parameter(torch.Tensor(vector_dim, matrix_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name('linear')() self.reset_parameters()
def from_params(cls, params: Params) -> 'LinearSimilarity': tensor_1_dim = params.pop_int("tensor_1_dim") tensor_2_dim = params.pop_int("tensor_2_dim") combination = params.pop("combination", "x,y") activation = Activation.by_name(params.pop("activation", "linear"))() params.assert_empty(cls.__name__) return cls(tensor_1_dim=tensor_1_dim, tensor_2_dim=tensor_2_dim, combination=combination, activation=activation)
def __init__(self, tensor_1_dim: int, tensor_2_dim: int, combination: str = 'x,y', activation: Activation = Activation.by_name('linear')()) -> None: super(LinearSimilarity, self).__init__() self._combination = combination combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim]) self._weight_vector = Parameter(torch.Tensor(combined_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation self.reset_parameters()
def from_params(cls, params: Params) -> 'CnnEncoder': embedding_dim = params.pop_int('embedding_dim') output_dim = params.pop_int('output_dim', None) num_filters = params.pop_int('num_filters') conv_layer_activation = Activation.by_name(params.pop("conv_layer_activation", "relu"))() ngram_filter_sizes = tuple(params.pop('ngram_filter_sizes', [2, 3, 4, 5])) params.assert_empty(cls.__name__) return cls(embedding_dim=embedding_dim, num_filters=num_filters, ngram_filter_sizes=ngram_filter_sizes, conv_layer_activation=conv_layer_activation, output_dim=output_dim)
def __init__(self, matrix_1_dim: int, matrix_2_dim: int, activation: Activation = None, use_input_biases: bool = False) -> None: super().__init__() if use_input_biases: matrix_1_dim += 1 matrix_2_dim += 1 self._weight_matrix = Parameter(torch.Tensor(matrix_1_dim, matrix_2_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name('linear')() self._use_input_biases = use_input_biases self.reset_parameters()
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, dropout: float = 0.0, num_layers: int = 1) -> None: super().__init__() self._input_attention = input_attention self._add_action_bias = add_action_bias self._activation = activation self._num_layers = num_layers self._predict_start_type_separately = predict_start_type_separately if predict_start_type_separately: self._start_type_predictor = Linear(encoder_output_dim, num_start_types) self._num_start_types = num_start_types else: self._start_type_predictor = None self._num_start_types = None # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. output_dim = encoder_output_dim input_dim = output_dim # Our decoder input will be the concatenation of the decoder hidden state and the previous # action embedding, and we'll project that down to the decoder's `input_dim`, which we # arbitrarily set to be the same as `output_dim`. self._input_projection_layer = Linear(output_dim + action_embedding_dim, input_dim) # Before making a prediction, we'll compute an attention over the input given our updated # hidden state. Then we concatenate those with the decoder state and project to # `action_embedding_dim` to make a prediction. self._output_projection_layer = Linear(output_dim + encoder_output_dim, action_embedding_dim) if self._num_layers > 1: self._decoder_cell = LSTM(input_dim, output_dim, self._num_layers) else: # We use a ``LSTMCell`` if we just have one layer because it is slightly faster since we are # just running the LSTM for one step each time. self._decoder_cell = LSTMCell(input_dim, output_dim) if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, dropout: float = 0.0) -> None: super().__init__(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout) # See the class docstring for a description of what this does. self._checklist_multiplier = Parameter(torch.FloatTensor([1.0]))
def __init__(self, tensor_1_dim: int, tensor_2_dim: int, combination: str = 'x,y', activation: Activation = None, prior = None) -> None: super(LinearSimilarityVB, self).__init__() self._combination = combination combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim]) self.posterior_mean = False # Flag to know if we sample from the posterior mean or we actually sample ## If no prior is specified we just create it ourselves if (type(prior) == type (None)): prior = Vil.Prior(0.5, np.log(0.1),np.log(0.5)) size_combination = int(torch.Tensor(combined_dim).size()[0]) # print ("Combination size: ", size_combination) prior = prior.get_standarized_Prior(size_combination) self.prior = prior """ Mean and rhos of the parameters """ self.mu_weight = Parameter(torch.Tensor(combined_dim))# , requires_grad=True self.rho_weight = Parameter(torch.Tensor(combined_dim)) self.rho_bias = Parameter(torch.Tensor(1)) self.mu_bias = Parameter(torch.Tensor(1)) """ The sampled weights """ self.weight = torch.Tensor(combined_dim) self.bias = torch.Tensor(1) self._activation = activation or Activation.by_name('linear')() ## Initialize the Variational variables self.reset_parameters()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(GraphParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError(f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)