def __init__(self, config: ParsingConfig): assert isinstance(config, ParsingConfig) super().__init__(config) self.config = config encoder_dim = config.decoder_config.output_dim if self.config.use_pos: self.pos_embedding = nn.Embedding(config.num_pos, config.pos_dim, padding_idx=0) encoder_dim += config.pos_dim self.head_arc_feedforward = FeedForward(encoder_dim, 1, config.arc_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(config.arc_dim, config.arc_dim, use_input_biases=True) self.head_tag_feedforward = FeedForward(encoder_dim, 1, config.tag_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(config.tag_dim, config.tag_dim, config.num_labels) self.dropout = InputVariationalDropout(config.dropout) self.use_mst_decoding_for_validation = config.use_mst_decoding_for_validation
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, dropout: float = 0.5, input_dropout: float = 0.5, head_tag_temperature: Optional[float] = None, head_temperature: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(Supertagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = \ arc_feedforward or FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")(), dropout=dropout) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = \ tag_feedforward or FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")(), dropout=dropout) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearWithBias(tag_representation_dim, tag_representation_dim, num_labels) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._input_dropout = Dropout(input_dropout) self._attachment_scores = CategoricalAccuracy() self._tagging_accuracy = CategoricalAccuracy() self.head_tag_temperature = head_tag_temperature self.head_temperature = head_temperature initializer(self)
def __init__(self, vocab: Vocabulary, encoder_dim: int, label_dim: int, edge_dim: int, dropout: float, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None) -> None: """ Parameters ---------- vocab : ``Vocabulary``, required A Vocabulary, required in order to compute sizes for input/output projections. encoder_dim : ``int``, required. The output dimension of the encoder. label_dim : ``int``, required. The dimension of the MLPs used for dependency tag prediction. edge_dim : ``int``, required. The dimension of the MLPs used for head arc prediction. tag_feedforward : ``FeedForward``, optional, (default = None). The feedforward network used to produce tag representations. By default, a 1 layer feedforward network with an elu activation is used. arc_feedforward : ``FeedForward``, optional, (default = None). The feedforward network used to produce arc representations. By default, a 1 layer feedforward network with an elu activation is used. dropout : ``float``, optional, (default = 0.0) The variational dropout applied to the output of the encoder and MLP layers. """ super(DMEdges, self).__init__(vocab) self._encoder_dim = encoder_dim self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, edge_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(edge_dim, edge_dim, use_input_biases=True) num_labels = vocab.get_vocab_size("head_tags") #= edge labels self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, label_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(label_dim, label_dim, num_labels) self._dropout = InputVariationalDropout(dropout) check_dimensions_match(label_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(edge_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim")
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__( self, vocab: Vocabulary, elmo_embedder: TextFieldEmbedder, tokens_embedder: TextFieldEmbedder, features_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, projected_layer: Seq2SeqEncoder, contextual_passage: Seq2SeqEncoder, contextual_question: Seq2SeqEncoder, dropout: float = 0.2, regularizer: Optional[RegularizerApplicator] = None, initializer: InitializerApplicator = InitializerApplicator(), ): super(MultiGranularityHierarchicalAttentionFusionNetworks, self).__init__(vocab, regularizer) self.elmo_embedder = elmo_embedder self.tokens_embedder = tokens_embedder self.features_embedder = features_embedder self._phrase_layer = phrase_layer self._encoding_dim = self._phrase_layer.get_output_dim() self.projected_layer = torch.nn.Linear(self._encoding_dim + 1024, self._encoding_dim) self.fuse_p = FusionLayer(self._encoding_dim) self.fuse_q = FusionLayer(self._encoding_dim) self.fuse_s = FusionLayer(self._encoding_dim) self.projected_lstm = projected_layer self.contextual_layer_p = contextual_passage self.contextual_layer_q = contextual_question self.linear_self_align = torch.nn.Linear(self._encoding_dim, 1) # self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._self_attention = BilinearMatrixAttention(self._encoding_dim, self._encoding_dim) self.bilinear_layer_s = BilinearSeqAtt(self._encoding_dim, self._encoding_dim) self.bilinear_layer_e = BilinearSeqAtt(self._encoding_dim, self._encoding_dim) self.yesno_predictor = FeedForward(self._encoding_dim, self._encoding_dim, 3) self.relu = torch.nn.ReLU() self._max_span_length = 30 self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._span_yesno_accuracy = CategoricalAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, inp_dim: int, label_dim: int, method: str) -> None: super(SpanPairLabelProjectionLayer, self).__init__() assert method in {'mlp', 'biaffine', 'biaffine_paired'} self.method = method if method == 'mlp': self.label_proj = TimeDistributed(nn.Linear(inp_dim, label_dim)) elif method == 'biaffine': self.label_proj = BilinearMatrixAttention(inp_dim, inp_dim, use_input_biases=True, label_dim=label_dim) elif method == 'biaffine_paired': # exactly the same as the last one but used with paired inputs self.bilinear = nn.modules.Bilinear(inp_dim // 2, inp_dim // 2, label_dim, bias=True) self.linear = TimeDistributed( nn.Linear(inp_dim, label_dim, bias=False)) self.label_proj = lambda x: self.bilinear(*x.split( inp_dim // 2, dim=-1)) + self.linear(x)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder_0: Seq2SeqEncoder, encoder_1: Seq2SeqEncoder, encoder_2: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, use_layer_normalization: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) a = vocab.get_index_to_token_vocabulary(namespace='tokens') # glyph_config['idx2word'] = {k: v for k, v in a.items()} # self.glyph = GlyphEmbedding(glyph_config) self.text_field_embedder = text_field_embedder self.encoder_0 = encoder_0 self.encoder_1 = encoder_1 self.encoder_2 = encoder_2 encoder_dim = self.encoder_2.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) # self._dropout = Dropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, self.encoder_2.get_output_dim()])) self.use_layer_normalization = use_layer_normalization if use_layer_normalization: self.norm_input = torch.nn.LayerNorm( self.encoder_0.get_input_dim()) self.norm_hidden = torch.nn.LayerNorm( self.encoder_0.get_output_dim()) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() # check_dimensions_match(representation_dim, encoder.get_input_dim(), # "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, lemmatize_helper: LemmatizeHelper, task_config: TaskConfig, morpho_vector_dim: int = 0, gram_val_representation_dim: int = -1, lemma_representation_dim: int = -1, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DependencyParser, self).__init__(vocab, regularizer) self.TopNCnt = 3 self.text_field_embedder = text_field_embedder self.encoder = encoder self.lemmatize_helper = lemmatize_helper self.task_config = task_config encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None assert self.task_config.params.get("use_pos_tag", False) == (self._pos_tag_embedding is not None) self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) if gram_val_representation_dim <= 0: self._gram_val_output = torch.nn.Linear( encoder_dim, self.vocab.get_vocab_size("grammar_value_tags")) else: self._gram_val_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(encoder_dim, gram_val_representation_dim), Dropout(dropout), torch.nn.Linear( gram_val_representation_dim, self.vocab.get_vocab_size("grammar_value_tags"))) if lemma_representation_dim <= 0: self._lemma_output = torch.nn.Linear(encoder_dim, len(lemmatize_helper)) else: # Заведем выход предсказания грамматической метки на вход лемматизатора -- ЭКСПЕРИМЕНТАЛЬНОЕ #actual_input_dim = encoder_dim actual_input_dim = encoder_dim + self.vocab.get_vocab_size( "grammar_value_tags") self._lemma_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(actual_input_dim, lemma_representation_dim), Dropout(dropout), torch.nn.Linear(lemma_representation_dim, len(lemmatize_helper))) representation_dim = text_field_embedder.get_output_dim( ) + morpho_vector_dim if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info("HELLO FROM INIT") logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() self._gram_val_prediction_accuracy = CategoricalAccuracy() self._lemma_prediction_accuracy = CategoricalAccuracy() initializer(self)
def __init__(self, config: SpanProposalConfig, bert_dir: str = ""): super().__init__() self.config = config num_pos_labels = len(config.pos_tags) hidden_size = config.additional_layer_dim if config.additional_layer > 0 else config.pos_dim + config.bert_config.hidden_size self.bert = AutoModel.from_pretrained( pretrained_model_name_or_path=bert_dir, config=config.bert_config) if config.pos_dim > 0: self.pos_embedding = nn.Embedding(num_pos_labels, config.pos_dim) nn.init.xavier_uniform_(self.pos_embedding.weight) if (config.additional_layer and config.additional_layer_type != "lstm" and config.pos_dim + config.bert_config.hidden_size != hidden_size): self.fuse_layer = nn.Linear( config.pos_dim + config.bert_config.hidden_size, hidden_size) nn.init.xavier_uniform_(self.fuse_layer.weight) self.fuse_layer.bias.data.zero_() else: self.fuse_layer = None else: self.pos_embedding = None if config.additional_layer > 0: if config.additional_layer_type == "transformer": new_config = deepcopy(config.bert_config) new_config.hidden_size = hidden_size new_config.num_hidden_layers = config.additional_layer new_config.hidden_dropout_prob = new_config.attention_probs_dropout_prob = config.mrc_dropout # new_config.attention_probs_dropout_prob = config.biaf_dropout # todo add to hparams and tune self.additional_encoder = BertEncoder(new_config) self.additional_encoder.apply(self._init_bert_weights) else: assert hidden_size % 2 == 0, "Bi-LSTM need an even hidden_size" self.additional_encoder = StackedBidirectionalLstmSeq2SeqEncoder( input_size=config.pos_dim + config.bert_config.hidden_size, hidden_size=hidden_size // 2, num_layers=config.additional_layer, recurrent_dropout_probability=config.mrc_dropout, use_highway=True) else: self.additional_encoder = None self._dropout = InputVariationalDropout(config.mrc_dropout) self.subtree_start_feedforward = FeedForward( hidden_size, 1, config.arc_representation_dim, Activation.by_name("elu")()) self.subtree_end_feedforward = deepcopy(self.subtree_start_feedforward) # todo: equivalent to self-attention? self.subtree_start_attention = BilinearMatrixAttention( config.arc_representation_dim, config.arc_representation_dim, use_input_biases=True) self.subtree_end_attention = deepcopy(self.subtree_start_attention) # init linear children for layer in self.children(): if isinstance(layer, nn.Linear): nn.init.xavier_uniform_(layer.weight) if layer.bias is not None: layer.bias.data.zero_()
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, model_name: str = None, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, word_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder if model_name: from src.data.token_indexers import PretrainedAutoTokenizer self._tokenizer = PretrainedAutoTokenizer.load(model_name) encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._word_dropout = word_dropout self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__(self, vocab, text_field_embedder, encoder, tag_representation_dim, arc_representation_dim, tag_feedforward=None, arc_feedforward=None, pos_tag_embedding=None, use_mst_decoding_for_validation=True, dropout=0.0, input_dropout=0.0, initializer=InitializerApplicator(), regularizer=None): super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or\ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name(u"elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size(u"head_tags") self.head_tag_feedforward = tag_feedforward or\ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name(u"elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), u"text field embedding dim", u"encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), u"tag representation dim", u"tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), u"arc representation dim", u"arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary(u"pos") punctuation_tag_indices = dict((tag, index) for tag, index in list(tags.items()) if tag in POS_TO_IGNORE) self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( "Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " u"Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, bert_dir, config): super().__init__() self.config = config num_dep_labels = len(config.dep_tags) num_pos_labels = len(config.pos_tags) hidden_size = config.additional_layer_dim if config.pos_dim > 0: self.pos_embedding = nn.Embedding(num_pos_labels, config.pos_dim) nn.init.xavier_uniform_(self.pos_embedding.weight) if config.additional_layer_type != "lstm" and config.pos_dim + config.hidden_size != hidden_size: self.fuse_layer = nn.Linear( config.pos_dim + config.hidden_size, hidden_size) nn.init.xavier_uniform_(self.fuse_layer.weight) self.fuse_layer.bias.data.zero_() else: self.fuse_layer = None else: self.pos_embedding = None if isinstance(config, BertDependencyConfig): self.bert = BertModel.from_pretrained(bert_dir, config=self.config) elif isinstance(config, RobertaDependencyConfig): self.bert = RobertaModel.from_pretrained(bert_dir, config=self.config) if config.additional_layer > 0: if config.additional_layer_type == "transformer": new_config = deepcopy(config) new_config.hidden_size = hidden_size new_config.num_hidden_layers = config.additional_layer new_config.hidden_dropout_prob = config.biaf_dropout new_config.attention_probs_dropout_prob = config.biaf_dropout # todo add to hparams and tune self.additional_encoder = BertEncoder(new_config) self.additional_encoder.apply(self._init_bert_weights) else: assert hidden_size % 2 == 0, "Bi-LSTM need an even hidden_size" self.additional_encoder = StackedBidirectionalLstmSeq2SeqEncoder( input_size=config.pos_dim + config.hidden_size, hidden_size=hidden_size // 2, num_layers=config.additional_layer, recurrent_dropout_probability=config.biaf_dropout, use_highway=True) else: self.additional_encoder = None self.head_arc_feedforward = FeedForward(hidden_size, 1, config.arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention( config.arc_representation_dim, config.arc_representation_dim, use_input_biases=True) self.head_tag_feedforward = FeedForward(hidden_size, 1, config.tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = deepcopy(self.head_tag_feedforward) self.tag_bilinear = nn.modules.Bilinear(config.tag_representation_dim, config.tag_representation_dim, num_dep_labels) nn.init.xavier_uniform_(self.tag_bilinear.weight) self.tag_bilinear.bias.data.zero_() self._dropout = InputVariationalDropout(config.biaf_dropout) self._input_dropout = nn.Dropout(config.biaf_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, hidden_size]))
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EnhancedParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") # the unlabelled_f1 is confirmed the same from both classes self._unlabelled_f1 = F1Measure(positive_label=1) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, arc_representation_dim: int, tag_representation_dim: int, capsule_dim: int, iter_num: int, arc_feedforward: FeedForward = None, tag_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, #dep_tag_embedding: Embedding = None, predicate_embedding: Embedding = None, delta_type: str = "hinge_ce", subtract_gold: bool = False, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, gumbel_t: float = 1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SRLGraphParserBase, self).__init__(vocab, regularizer) self.capsule_dim = capsule_dim self.iter_num = iter_num self.text_field_embedder = text_field_embedder self.encoder = encoder self.subtract_gold = subtract_gold self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") # print ("predicates",self.vocab._index_to_token["predicates"]) # print ("arc_types",self.vocab._index_to_token["arc_types"]) self.delta_type = delta_type num_labels = self.vocab.get_vocab_size("arc_types") print("num_labels", num_labels) self.gumbel_t = gumbel_t node_dim = predicate_embedding.get_output_dim() encoder_dim = encoder.get_output_dim() self.arg_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward) self.arc_attention = BilinearMatrixAttention( arc_representation_dim, arc_representation_dim, #label_dim=capsule_dim, use_input_biases=True) self.arg_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention( tag_representation_dim, tag_representation_dim, label_dim=num_labels * capsule_dim, use_input_biases=True) #,activation=Activation.by_name("tanh")() self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim, Activation.by_name("elu")()) self._pos_tag_embedding = pos_tag_embedding or None #self._dep_tag_embedding = dep_tag_embedding or None self._pred_embedding = predicate_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self._labelled_f1 = IterativeLabeledF1Measure( negative_label=0, negative_pred=0, selected_metrics=["F", "l_F", "p_F", "u_F"]) self._tag_loss = torch.nn.NLLLoss(reduction="none") # ,ignore_index=-1 self._sense_loss = torch.nn.NLLLoss( reduction="none") # ,ignore_index=-1 initializer(self)
def __init__(self, n_relations: int, conf: Dict, input_batchers: Dict[str, InputBatch], use_cuda: bool): super(BiaffineParser, self).__init__() self.n_relations = n_relations self.conf = conf self.use_cuda = use_cuda self.use_mst_decoding_for_validation = conf[ 'use_mst_decoding_for_validation'] input_layers = {} for i, c in enumerate(conf['input']): if c['type'] == 'embeddings': if 'pretrained' in c: embs = load_embedding_txt(c['pretrained'], c['has_header']) logger.info('loaded {0} embedding entries.'.format( len(embs[0]))) else: embs = None name = c['name'] mapping = input_batchers[name].mapping layer = Embeddings(name, c['dim'], mapping, fix_emb=c['fixed'], embs=embs, normalize=c.get('normalize', False)) logger.info('embedding for field {0} ' 'created with {1} x {2}.'.format( c['field'], layer.n_V, layer.n_d)) input_layers[name] = layer elif c['type'] == 'cnn_encoder' or c['type'] == 'lstm_encoder': name = c['name'] mapping = input_batchers[name].mapping embeddings = Embeddings('{0}_ch_emb', c['dim'], mapping, fix_emb=False, embs=None, normalize=False) logger.info('character embedding for field {0} ' 'created with {1} x {2}.'.format( c['field'], embeddings.n_V, embeddings.n_d)) if c['type'] == 'lstm_encoder': layer = LstmTokenEmbedder(name, c['dim'], embeddings, conf['dropout'], use_cuda) elif c['type'] == 'cnn_encoder': layer = ConvTokenEmbedder(name, c['dim'], embeddings, c['filters'], c.get('n_highway', 1), c.get('activation', 'relu'), use_cuda) else: raise ValueError('Unknown type: {}'.format(c['type'])) input_layers[name] = layer elif c['type'] == 'elmo': name = c['name'] layer = ContextualizedWordEmbeddings(name, c['path'], use_cuda) input_layers[name] = layer else: raise ValueError('{} unknown input layer'.format(c['type'])) self.input_layers = torch.nn.ModuleDict(input_layers) input_encoders = [] input_dim = 0 for i, c in enumerate(conf['input_encoder']): input_info = { name: [ entry['dim'] for entry in conf['input'] if entry['name'] == name ][0] for name in c['input'] } if c['type'] == 'affine': input_encoder = AffineTransformInputEncoder( input_info, c['dim'], use_cuda) elif c['type'] == 'sum': input_encoder = SummationInputEncoder(input_info, use_cuda) elif c['type'] == 'concat': input_encoder = ConcatenateInputEncoder(input_info, use_cuda) else: raise ValueError('{} unknown input encoder'.format(c['type'])) input_dim += input_encoder.get_output_dim() input_encoders.append(input_encoder) self.input_encoders = torch.nn.ModuleList(input_encoders) c = conf['context_encoder'] if c['type'] == 'stacked_bidirectional_lstm_dozat': self.encoder = PytorchSeq2SeqWrapper( InputDropoutedStackedBidirectionalLstm( DozatLstmCell, num_layers=c['num_layers'], input_size=input_dim, hidden_size=c['hidden_dim'], recurrent_dropout_probability=c[ 'recurrent_dropout_probability'], layer_dropout_probability=c['layer_dropout_probability'], activation=Activation.by_name("leaky_relu")()), stateful=False) elif c['type'] == 'stacked_bidirectional_lstm_ma': self.encoder = PytorchSeq2SeqWrapper( InputDropoutedStackedBidirectionalLstm( MaLstmCell, num_layers=c['num_layers'], input_size=input_dim, hidden_size=c['hidden_dim'], recurrent_dropout_probability=c[ 'recurrent_dropout_probability'], layer_dropout_probability=c['layer_dropout_probability'], activation=Activation.by_name("tanh")()), stateful=False) elif c['type'] == 'stacked_bidirectional_lstm': self.encoder = PytorchSeq2SeqWrapper(StackedBidirectionalLstm( num_layers=c['num_layers'], input_size=input_dim, hidden_size=c['hidden_dim'], recurrent_dropout_probability=c[ 'recurrent_dropout_probability'], layer_dropout_probability=c['layer_dropout_probability']), stateful=False) else: self.encoder = DummyContextEncoder() encoder_dim = self.encoder.get_output_dim() c = conf['biaffine_parser'] self.arc_representation_dim = arc_representation_dim = c[ 'arc_representation_dim'] self.tag_representation_dim = tag_representation_dim = c[ 'tag_representation_dim'] self.head_sentinel_ = torch.nn.Parameter( torch.randn([1, 1, encoder_dim])) self.head_arc_feedforward = FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.head_tag_feedforward = FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) arc_attention_version = c.get('arc_attention_version', 'v1') if arc_attention_version == 'v2': self.arc_attention = BilinearMatrixAttentionV2( arc_representation_dim, arc_representation_dim, use_input_biases=True) else: self.arc_attention = BilinearMatrixAttention( arc_representation_dim, arc_representation_dim, use_input_biases=True) self.tag_bilinear = BilinearWithBias(tag_representation_dim, tag_representation_dim, n_relations) self.input_dropout_ = torch.nn.Dropout2d(p=conf['dropout']) self.dropout_ = InputVariationalDropout(p=conf['dropout']) self.input_encoding_timer = TimeRecoder() self.context_encoding_timer = TimeRecoder() self.classification_timer = TimeRecoder()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, arc_representation_dim: int, tag_representation_dim: int, r_lambda: float = 1e-2, normalize: bool = False, arc_feedforward: FeedForward = None, tag_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dep_tag_embedding: Embedding = None, predicate_embedding: Embedding = None, delta_type: str = "hinge_ce", subtract_gold: float = 0.0, dropout: float = 0.0, input_dropout: float = 0.0, gumbel_t: float = 0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SRLGraphParserBase, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.r_lambda = r_lambda self.normalize = normalize self.as_base = False # print ("predicates",self.vocab._index_to_token["predicates"]) # print ("tags",self.vocab._index_to_token["tags"]) self.subtract_gold = subtract_gold self.delta_type = delta_type num_labels = self.vocab.get_vocab_size("tags") print("num_labels", num_labels) self.gumbel_t = gumbel_t node_dim = predicate_embedding.get_output_dim() encoder_dim = encoder.get_output_dim() self.arg_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.pred_arc_feedforward = copy.deepcopy(self.arg_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) self.arg_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.pred_tag_feedforward = copy.deepcopy(self.arg_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention( tag_representation_dim, tag_representation_dim, label_dim=num_labels, use_input_biases=True) #,activation=Activation.by_name("tanh")() self.predicte_feedforward = FeedForward(encoder_dim, 1, node_dim, Activation.by_name("elu")()) self._pos_tag_embedding = pos_tag_embedding or None self._dep_tag_embedding = dep_tag_embedding or None self._pred_embedding = predicate_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self._labelled_f1 = IterativeLabeledF1Measure( negative_label=0, negative_pred=0, selected_metrics=["F", "p_F", "l_P", "l_R"]) self._tag_loss = torch.nn.NLLLoss(reduction="none") # ,ignore_index=-1 self._sense_loss = torch.nn.NLLLoss( reduction="none") # ,ignore_index=-1 initializer(self)
def __init__( self, vocab: Vocabulary, embedding_dim: int, tag_representation_dim: int, arc_representation_dim: int, encoder: Seq2SeqEncoder = None, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, pos_namespace: str = "xpos_tags", deprel_namespace: str = "deprel_labels", initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.encoder = encoder encoder_dim = ( encoder.get_output_dim() if encoder is not None else embedding_dim + pos_tag_embedding.get_output_dim() ) self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")() ) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention( arc_representation_dim, arc_representation_dim, use_input_biases=True ) self.pos_namespace = pos_namespace self.deprel_namespace = deprel_namespace num_labels = self.vocab.get_vocab_size(deprel_namespace) self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")() ) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear( tag_representation_dim, tag_representation_dim, num_labels ) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder_dim])) representation_dim = embedding_dim if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() if self.encoder is not None: check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary(self.pos_namespace) punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation." ) self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_embed_dim: int = None, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DependencyDecoder, self).__init__(vocab, regularizer) self.pos_tag_embedding = None if pos_embed_dim is not None: self.pos_tag_embedding = Embedding(self.vocab.get_vocab_size("upos"), pos_embed_dim) self.dropout = torch.nn.Dropout(p=dropout) self.encoder = encoder encoder_output_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_output_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_output_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._dropout = InputVariationalDropout(dropout) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder_output_dim])) check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, labeler: Seq2SeqEncoder, projection_size: int, bidirectional: bool = False, use_hypothesis: bool = True, attention: str = "", # "" - none / cosine / bilinear initializer: InitializerApplicator = None, classifier_dir = "", del_perc_lambda = 1, del_perc = 0.3, del_metric_threshold = 0.1, teacher_lambda = 0.0, coverage_lambda = 0.0, transition_lamb = 0.0, gumbel = True, neutral_label = "") -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self.classifier_dir = classifier_dir self.classifier = None self.coverage_lambda = coverage_lambda self.del_perc_lambda = del_perc_lambda self.del_perc = del_perc self.teacher_lambda = teacher_lambda self.transition_lamb = transition_lamb self.gumbel = gumbel if classifier_dir != "": overrides = '{"model": {"dropout": 0, "output_feedforward": {"dropout": 0}}}' overrides = "" archive = load_archive(classifier_dir, overrides=overrides) self.classifier = archive.model # Freeze parameters for p in self.classifier.parameters(): p.requires_grad = False # A hack that prevents allennlp from crushing when running extend on all submodules def foo(*x, **y): return 1 self.classifier._text_field_embedder.token_embedder_tokens.extend_vocab = foo self.classifier.eval() # get index of the neutral label self.neutral_ind = self.classifier.vocab.get_token_index(neutral_label, 'labels') self.criterion = torch.nn.CrossEntropyLoss() self._contextualizer = contextualizer self._labeler = labeler self._bidirectional = bidirectional self.use_hypothesis = use_hypothesis self.attention = attention self.projection_size = projection_size # hypothesis aggr self.w_prem = torch.nn.Linear(contextualizer.get_output_dim(), projection_size) if use_hypothesis: self.w_hyp = torch.nn.Linear(contextualizer.get_output_dim(), projection_size) self._contextual_dim = contextualizer.get_output_dim() # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = self._contextual_dim // 2 else: self._forward_dim = self._contextual_dim if self.attention: if self.attention == "cosine": self.attention_mat = CosineMatrixAttention() elif self.attention == "bilinear": self.attention_mat = BilinearMatrixAttention(self._forward_dim, self._forward_dim) else: raise ConfigurationError("Undefined attention type") self.mask_linear = torch.nn.Linear(self._labeler.get_output_dim(), 2) self._accuracy = CategoricalAccuracy() self._avg_perc_masked = Average() self._avg_transition = Average() self._acc_vs_del = AccuracyVSDeletion(del_threshold=del_metric_threshold) self._acc_plus_del = AccuracyVSDeletion(del_threshold=0, aggr="sum") self._f1_deletions = F1SequenceMeasure(positive_label=1) if initializer is not None: initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, treebank_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, use_treebank_embedding: bool = False, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParserMonolingual, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._treebank_embedding = treebank_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() if treebank_embedding is not None: representation_dim += treebank_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation self.use_treebank_embedding = use_treebank_embedding tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") if self.use_treebank_embedding: tbids = self.vocab.get_token_to_index_vocabulary("tbids") tbid_indices = {tb: index for tb, index in tbids.items()} self._tbids = set(tbid_indices.values()) logger.info(f"Found TBIDs corresponding to the following treebanks : {tbid_indices}. " "Embedding these as additional features.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, options, tag_dim, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(None, regularizer) self.device = options.device encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(tag_dim, options.lstm_dims, batch_first=True, bidirectional=True)) # encoder = PytorchSeq2SeqWrapper(torch.nn.LSTM(tag_dim, options.lstm_dims, batch_first=True)) self.encoder = encoder # TODO: IMPORTANT num_labels = options.num_labels self.ablation = options.ablation # print(num_labels) tag_representation_dim = options.tag_representation_dim # 100 arc_representation_dim = options.arc_representation_dim # 200 encoder_dim = self.encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = tag_dim #text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation self._pos_to_ignore = set() # tags = self.vocab.get_token_to_index_vocabulary("pos") # punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} # self._pos_to_ignore = set(punctuation_tag_indices.values()) # logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " # "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, lemma_tag_embedding: Embedding = None, upos_tag_embedding: Embedding = None, xpos_tag_embedding: Embedding = None, feats_tag_embedding: Embedding = None, head_information_embedding: Embedding = None, head_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("deps") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._lemma_tag_embedding = lemma_tag_embedding or None self._upos_tag_embedding = upos_tag_embedding or None self._xpos_tag_embedding = xpos_tag_embedding or None self._feats_tag_embedding = feats_tag_embedding or None self._head_tag_embedding = head_tag_embedding or None self._head_information_embedding = head_information_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token in EUD graphs self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if lemma_tag_embedding is not None: representation_dim += lemma_tag_embedding.get_output_dim() if upos_tag_embedding is not None: representation_dim += upos_tag_embedding.get_output_dim() if xpos_tag_embedding is not None: representation_dim += xpos_tag_embedding.get_output_dim() if feats_tag_embedding is not None: representation_dim += feats_tag_embedding.get_output_dim() if head_tag_embedding is not None: representation_dim += head_tag_embedding.get_output_dim() if head_information_embedding is not None: representation_dim += head_information_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__( self, task: str, vocab: Vocabulary, input_dim: int, tag_representation_dim: int, arc_representation_dim: int, loss_weight: float = 1.0, metric: str = 'las', use_mst_decoding_for_validation: bool = True, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.task = task self.input_dim = input_dim self.loss_weight = loss_weight self.head_arc_feedforward = FeedForward(input_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size(task + "_rels") self.head_tag_feedforward = FeedForward(input_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, self.input_dim])) representation_dim = self.input_dim #text_field_embedder.get_output_dim() check_dimensions_match( representation_dim, self.input_dim, "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation self.metrics = { "las": AttachmentScores(), }