def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, ignore_span_metric: bool = False) -> None: super(SemanticRoleLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField(self.num_tags, constraints) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward_layer: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = None) -> None: super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed(feedforward_layer) if feedforward_layer else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward_layer is not None: output_dim = feedforward_layer.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim") if feedforward_layer is not None: check_dimensions_match(span_extractor.get_output_dim(), feedforward_layer.get_input_dim(), "span extractor output dim", "feedforward input dim") self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SimpleTagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, lemmatize_helper: LemmatizeHelper, task_config: TaskConfig, morpho_vector_dim: int = 0, gram_val_representation_dim: int = -1, lemma_representation_dim: int = -1, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.lemmatize_helper = lemmatize_helper self.task_config = task_config encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None assert self.task_config.params.get("use_pos_tag", False) == (self._pos_tag_embedding is not None) self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) if gram_val_representation_dim <= 0: self._gram_val_output = torch.nn.Linear( encoder_dim, self.vocab.get_vocab_size("grammar_value_tags")) else: self._gram_val_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(encoder_dim, gram_val_representation_dim), Dropout(dropout), torch.nn.Linear( gram_val_representation_dim, self.vocab.get_vocab_size("grammar_value_tags"))) if lemma_representation_dim <= 0: self._lemma_output = torch.nn.Linear(encoder_dim, len(lemmatize_helper)) else: self._lemma_output = torch.nn.Sequential( Dropout(dropout), torch.nn.Linear(encoder_dim, lemma_representation_dim), Dropout(dropout), torch.nn.Linear(lemma_representation_dim, len(lemmatize_helper))) representation_dim = text_field_embedder.get_output_dim( ) + morpho_vector_dim if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() self._gram_val_prediction_accuracy = CategoricalAccuracy() self._lemma_prediction_accuracy = CategoricalAccuracy() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30, max_turn_length: int = 12) -> None: super().__init__(vocab) self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_atten = TimeDistributed(torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding(max_turn_length, marker_embedding_dim * num_context_answers) self._prev_ans_marker = torch.nn.Embedding((num_context_answers * 4) + 1, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed(torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) check_dimensions_match(phrase_layer.get_input_dim(), text_field_embedder.get_output_dim() + marker_embedding_dim * num_context_answers, "phrase layer input dim", "embedding dim + marker dim * num context answers") initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, ud_tag_field_embedder: TextFieldEmbedder, ud_label_field_embedder: TextFieldEmbedder, sequence_encoder: Seq2SeqEncoder, tree_encoder_output_dim: int, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, dropout: float = 0.5, input_dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, head_tag_temperature: Optional[float] = None, head_temperature: Optional[float] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.ud_tag_field_embedder = ud_tag_field_embedder self.ud_label_field_embedder = ud_label_field_embedder self.sequence_encoder = sequence_encoder embed_dim = sequence_encoder.get_output_dim( ) + ud_label_field_embedder.get_output_dim() self.tree_encoder = BidirectionalTreeLSTMEncoder( embed_dim, tree_encoder_output_dim, dropout) feedforward_input_dim = tree_encoder_output_dim self.head_arc_feedforward = \ arc_feedforward or FeedForward(feedforward_input_dim, 1, arc_representation_dim, Activation.by_name("elu")(), dropout=dropout) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = \ tag_feedforward or FeedForward(feedforward_input_dim, 1, tag_representation_dim, Activation.by_name("elu")(), dropout=dropout) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearWithBias(tag_representation_dim, tag_representation_dim, num_labels) self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, embed_dim])) representation_dim = text_field_embedder.get_output_dim( ) + self.ud_tag_field_embedder.get_output_dim() check_dimensions_match(representation_dim, sequence_encoder.get_input_dim(), "text field embedding dim", "sequence encoder input dim") check_dimensions_match(embed_dim, self.tree_encoder.get_input_dim(), "sequence encoder output dim", "tree encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._dropout = Dropout(dropout) self._input_dropout = InputVariationalDropout(input_dropout) self._attachment_scores = CategoricalAccuracy() self._tagging_accuracy = CategoricalAccuracy() self.head_tag_temperature = head_tag_temperature self.head_temperature = head_temperature initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but " "no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError( "calculate_span_f1 is True, but " "no label_encoding was specified." ) self._f1_metric = SpanBasedF1Measure( vocab, tag_namespace=label_namespace, label_encoding=label_encoding ) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) if feedforward is not None: check_dimensions_match( encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) initializer(self)
def __init__(self, vocab: Vocabulary, sh_hierarchy_dir: str, text_field_embedder: TextFieldEmbedder, abstract_text_encoder: Seq2SeqEncoder, attention_encoder: AttentionEncoder, local_globel_tradeoff: float = 0.5, bce_pos_weight: int = 10, use_positional_encoding: bool = False, child_parent_index_pair_dir: str = None, hv_penalty_lambda: float = 0.1, hidden_states_dropout: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EtdHMCNHierarchicalAttention, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder # self.num_classes = self.vocab.get_vocab_size("labels") self.abstract_text_encoder = abstract_text_encoder # self.attention_encoder = attention_encoder self.local_globel_tradeoff = local_globel_tradeoff self.use_positional_encoding = use_positional_encoding with open(sh_hierarchy_dir, 'r') as f: sh_hierarchy = json.load(f) # Use same dimension of encoders as HMCN dimension self.num_hierarchy_level = len(sh_hierarchy) self.attention_encoders = [attention_encoder] for i in range(self.num_hierarchy_level - 1): self.attention_encoders.append(deepcopy(attention_encoder)) self.attention_encoders = torch.nn.ModuleList(self.attention_encoders) self.HMCN_recurrent = HMCNRecurrent( [len(l) for _, l in sh_hierarchy.items()], attention_encoder.get_output_dim(), attention_encoder.get_output_dim(), hidden_states_dropout=hidden_states_dropout) if text_field_embedder.get_output_dim( ) != abstract_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the abstract_text_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_text_encoder.get_input_dim())) self.metrics = { # "roc_auc_score": RocAucScore() "hit_5": HitAtK(5), "hit_10": HitAtK(10) # "precision_5": PrecisionAtK(5), # "precision_10": PrecisionAtK(10) # "hit_100": HitAtK(100), # "macro_measure": MacroF1Measure(top_k=5,num_label=self.num_classes) } if child_parent_index_pair_dir: child_parent_pairs = [] with open(child_parent_index_pair_dir, 'r') as f: for l in f.readlines(): pair = l.strip().split(',') child_parent_pairs.append((int(pair[0]), int(pair[1]))) childs_idx, parents_idx = map(list, zip(*child_parent_pairs)) self.loss = HMCNLoss( num_classes=[len(l) for _, l in sh_hierarchy.items()], bce_pos_weight=bce_pos_weight, childs_idx=childs_idx, parents_idx=parents_idx, penalty_lambda=hv_penalty_lambda) else: self.loss = HMCNLoss( num_classes=[len(l) for _, l in sh_hierarchy.items()], bce_pos_weight=bce_pos_weight) # self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.ones(self.num_classes)*bce_pos_weight) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, lemma_tag_embedding: Embedding = None, upos_tag_embedding: Embedding = None, xpos_tag_embedding: Embedding = None, feats_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError(f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")() ) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention( arc_representation_dim, arc_representation_dim, use_input_biases=True ) num_labels = self.vocab.get_vocab_size("deps") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")() ) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention( tag_representation_dim, tag_representation_dim, label_dim=num_labels ) self._lemma_tag_embedding = lemma_tag_embedding or None self._upos_tag_embedding = upos_tag_embedding or None self._xpos_tag_embedding = xpos_tag_embedding or None self._feats_tag_embedding = feats_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token in EUD graphs self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if lemma_tag_embedding is not None: representation_dim += lemma_tag_embedding.get_output_dim() if upos_tag_embedding is not None: representation_dim += upos_tag_embedding.get_output_dim() if xpos_tag_embedding is not None: representation_dim += xpos_tag_embedding.get_output_dim() if feats_tag_embedding is not None: representation_dim += feats_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlowBasic, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = {x: Average() for x in self._bleu_score_types_to_use} # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = ["rouge-{0}".format(x) for x in range(1, max_rouge_n + 1)] rouge_scores_names = rouge_n_metrics + [y for y in self._rouge_score_types_to_use if y != 'rouge-n'] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge(metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._metrics = SquadEmAndF1() self._dropout = torch.nn.Dropout( p=dropout_prob) if dropout_prob > 0 else lambda x: x # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = { x: Average() for x in self._bleu_score_types_to_use } # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = [ "rouge-{0}".format(x) for x in range(1, max_rouge_n + 1) ] rouge_scores_names = rouge_n_metrics + [ y for y in self._rouge_score_types_to_use if y != 'rouge-n' ] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge( metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder = None, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, include_start_end_transitions: bool = True, dropout: Optional[float] = None, use_upos_constraints: bool = True, use_lemma_constraints: bool = True, train_with_constraints: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.train_with_constraints = train_with_constraints self.encoder = encoder if self.encoder is not None: encoder_output_dim = self.encoder.get_output_dim() else: encoder_output_dim = self.text_field_embedder.get_output_dim() if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = encoder_output_dim self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) self._label_namespace = label_namespace labels = self.vocab.get_index_to_token_vocabulary(self._label_namespace) constraints = streusle_allowed_transitions(labels) self.use_upos_constraints = use_upos_constraints self.use_lemma_constraints = use_lemma_constraints if self.use_lemma_constraints and not self.use_upos_constraints: raise ConfigurationError("If lemma constraints are applied, UPOS constraints must be applied as well.") if self.use_upos_constraints: # Get a dict with a mapping from UPOS to allowed LEXCAT here. self._upos_to_allowed_lexcats: Dict[str, Set[str]] = get_upos_allowed_lexcats( stronger_constraints=self.use_lemma_constraints) # Dict with a amapping from UPOS to dictionary of [UPOS, list of additionally allowed LEXCATS] self._lemma_to_allowed_lexcats: Dict[str, Dict[str, List[str]]] = get_lemma_allowed_lexcats() # Use labels and the upos_to_allowed_lexcats to get a dict with # a mapping from UPOS to a mask with 1 at allowed label indices and 0 at # disallowed label indices. self._upos_to_label_mask: Dict[str, torch.Tensor] = {} for upos in ALL_UPOS: # Shape: (num_labels,) upos_label_mask = torch.zeros(len(labels), device=next(self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): if len(label.split("-")) == 1: upos_label_mask[label_index] = 1 continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith("o-"): # Label does not start with O-/o-, always allowed. upos_label_mask[label_index] = 1 elif label_lexcat in self._upos_to_allowed_lexcats[upos]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. upos_label_mask[label_index] = 1 self._upos_to_label_mask[upos] = upos_label_mask # Use labels and the lemma_to_allowed_lexcats to get a dict with # a mapping from lemma to a mask with 1 at an _additionally_ allowed label index # and 0 at disallowed label indices. If lemma_to_label_mask has a 0, and upos_to_label_mask # has a 0, the lexcat is not allowed for the (upos, lemma). If either lemma_to_label_mask or # upos_to_label_mask has a 1, the lexcat is allowed for the (upos, lemma) pair. self._lemma_upos_to_label_mask: Dict[Tuple[str, str], torch.Tensor] = {} for lemma in SPECIAL_LEMMAS: for upos_tag in ALL_UPOS: # No additional constraints, should be all zero if upos_tag not in self._lemma_to_allowed_lexcats[lemma]: continue # Shape: (num_labels,) lemma_upos_label_mask = torch.zeros(len(labels), device=next(self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): # For ~i, etc. tags. We don't deal with them here. if len(label.split("-")) == 1: continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith("o-"): # Label does not start with O-/o-, so we don't deal with it here continue if label_lexcat in self._lemma_to_allowed_lexcats[lemma][upos_tag]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. lemma_upos_label_mask[label_index] = 1 self._lemma_upos_to_label_mask[(lemma, upos_tag)] = lemma_upos_label_mask self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.accuracy_metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.streuseval_metric = Streuseval() if encoder is not None: check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these # aren't necessarily obvious from the configuration files, so we check # here. if modeling_layer.get_input_dim() != 4 * encoding_dim: raise ConfigurationError( "The input dimension to the modeling_layer must be " "equal to 4 times the encoding dimension of the phrase_layer. " "Found {} and 4 * {} respectively.".format( modeling_layer.get_input_dim(), encoding_dim)) if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder (embedding_dim + " "char_cnn) must match the input dimension of the phrase_encoder. " "Found {} and {}, respectively.".format( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim())) if span_end_encoder.get_input_dim( ) != encoding_dim * 4 + modeling_dim * 3: raise ConfigurationError( "The input dimension of the span_end_encoder should be equal to " "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. " "Found {} and (4 * {} + 3 * {}) " "respectively.".format(span_end_encoder.get_input_dim(), encoding_dim, modeling_dim)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, parser_model_path: str, parser_cuda_device: int, freeze_parser: bool, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._parser = load_archive(parser_model_path, cuda_device=parser_cuda_device).model self._parser._head_sentinel.requires_grad = False for child in self._parser.children(): for param in child.parameters(): param.requires_grad = False if not freeze_parser: for param in self._parser.encoder.parameters(): param.requires_grad = True initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, class_weights: list = [], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, encode_together: bool = False, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self.encode_together = encode_together self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None if class_weights: self.class_weights = class_weights else: self.class_weights = [1.] * self.output_feedforward.get_output_dim() self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input", ) check_dimensions_match( projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim", ) self.metrics = {"accuracy": CategoricalAccuracy()} for _class in range(len(self.class_weights)): self.metrics.update({ f"f1_rel{_class}": F1Measure(_class), }) self._loss = torch.nn.CrossEntropyLoss(weight=torch.FloatTensor(self.class_weights)) initializer(self)
def __init__( self, vocab: Vocabulary, text_embedder: TextFieldEmbedder, definition_encoder: Seq2SeqEncoder, definition_decoder: FeedForward, definition_feedforward: FeedForward = None, definition_pooling: str = 'last', definition_namespace: str = 'definition', word_namespace: str = 'word', alpha: float = 1.0, beta: float = 8.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.definition_namespace = definition_namespace self.word_namespace = word_namespace self.definition_vocab_size = self.vocab.get_vocab_size( namespace=self.definition_namespace) self._oov_index = self.vocab.get_token_index(self.vocab._oov_token, self.definition_namespace) self.limited_word_vocab_size = None self.alpha = alpha self.beta = beta self.eps = 10e-8 logger.info( f'Definition vocab size: {self.vocab.get_vocab_size(namespace=self.definition_namespace)}' ) logger.info( f'Word vocab size: {self.vocab.get_vocab_size(namespace=self.word_namespace)}' ) logger.info('Intersection vocab size: {}'.format( len( set(self.vocab._token_to_index[ self.definition_namespace].keys()).intersection( set(self.vocab._token_to_index[ self.word_namespace].keys()))))) # TODO: check text_embedder self.text_embedder = text_embedder self.definition_encoder = definition_encoder self.definition_decoder = definition_decoder self.definition_pooling = definition_pooling if definition_feedforward is not None: self.definition_feedforward = definition_feedforward else: self.definition_feedforward = lambda x: x if self.definition_pooling == 'self-attentive': self.self_attentive_pooling_projection = nn.Linear( self.definition_encoder.get_output_dim(), 1) # checks check_dimensions_match(text_embedder.get_output_dim(), definition_encoder.get_input_dim(), 'emb_dim', 'encoder_input_dim') if self.definition_decoder.get_output_dim( ) > self.vocab.get_vocab_size(definition_namespace): ConfigurationError( f'Decoder output({self.definition_decoder.get_output_dim()}) dim is larger than' f'vocabulary size({self.vocab.get_vocab_size(definition_namespace)}).' ) if self.definition_decoder.get_output_dim( ) < self.vocab.get_vocab_size(definition_namespace): self.limited_word_vocab_size = self.definition_decoder.get_output_dim( ) # self.pdist = nn.PairwiseDistance(p=2) self.pdist = lambda x, y: torch.mean((x - y)**2, dim=1) self.metrics = {'consistency_loss': EuclideanDistance()} initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, use_attention: bool = False, use_positional_encoding: bool = False, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, has_mode: bool = False, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, calculate_relation_f1: bool = False, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), top_k: int = 1, max_relation_width:int = 11, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.top_k = top_k self._verbose_metrics = verbose_metrics self.use_attention = use_attention self.use_positional_encoding = use_positional_encoding self._sample_probability = compounding(0.1, 1.0, 0.99) self.has_mode = has_mode if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if self.use_attention: self._attention = SelfAttentionGRU( output_dim, embedding_size=encoder.get_output_dim(), rnn_hidden_size=encoder.get_output_dim(), bos_index=self.vocab.get_token_index("O", label_namespace) ) if self.use_positional_encoding: self.positional_encoding = PositionalEncoding(d_model=encoder.get_output_dim(),dropout=dropout) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError( "calculate_span_f1 is True, but no label_encoding was specified." ) self._f1_metric = SpanBasedF1Measure( vocab, tag_namespace=label_namespace, label_encoding=label_encoding ) self.calculate_relation_f1 = calculate_relation_f1 if calculate_relation_f1: self._relation_f1_metric = RelationMetric( vocab, tag_namespace=label_namespace, label_encoding=label_encoding, has_mode=has_mode, max_relation_width=max_relation_width ) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) if feedforward is not None: check_dimensions_match( encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) self.j = 0 initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, activation=Activation.by_name("tanh")(), lemma_tag_embedding: Embedding = None, upos_tag_embedding: Embedding = None, xpos_tag_embedding: Embedding = None, feats_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.activation = activation self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() # these two matrices together form the feed forward network which takes the vectors of the two words in question and makes predictions from that # this is the trick described by Kiperwasser and Goldberg to make training faster. self.edge_head = Linear(encoder_dim, arc_representation_dim) self.edge_dep = Linear( encoder_dim, arc_representation_dim, bias=False) # bias is already added by edge_head self.tag_head = Linear(encoder_dim, tag_representation_dim) self.tag_dep = Linear(encoder_dim, tag_representation_dim, bias=False) num_labels = self.vocab.get_vocab_size("deps") self.arc_out_layer = Linear( arc_representation_dim, 1, bias=False) # no bias in output layer of K&G model self.tag_out_layer = Linear(arc_representation_dim, num_labels) self._lemma_tag_embedding = lemma_tag_embedding or None self._upos_tag_embedding = upos_tag_embedding or None self._xpos_tag_embedding = xpos_tag_embedding or None self._feats_tag_embedding = feats_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if lemma_tag_embedding is not None: representation_dim += lemma_tag_embedding.get_output_dim() if upos_tag_embedding is not None: representation_dim += upos_tag_embedding.get_output_dim() if xpos_tag_embedding is not None: representation_dim += xpos_tag_embedding.get_output_dim() if feats_tag_embedding is not None: representation_dim += feats_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, intent_encoder: Seq2SeqEncoder = None, tag_encoder: Seq2SeqEncoder = None, attention: Attention = None, attention_function: SimilarityFunction = None, context_for_intent: bool = True, context_for_tag: bool = True, attention_for_intent: bool = True, attention_for_tag: bool = True, sequence_label_namespace: str = "labels", intent_label_namespace: str = "intent_labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, crf_decoding: bool = False, constrain_crf_decoding: bool = None, focal_loss_gamma: float = None, nongeneral_intent_weight: float = 5., num_train_examples: float = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.context_for_intent = context_for_intent self.context_for_tag = context_for_tag self.attention_for_intent = attention_for_intent self.attention_for_tag = attention_for_tag self.sequence_label_namespace = sequence_label_namespace self.intent_label_namespace = intent_label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(sequence_label_namespace) self.num_intents = self.vocab.get_vocab_size(intent_label_namespace) self.encoder = encoder self.intent_encoder = intent_encoder self.tag_encoder = intent_encoder self._feedforward = feedforward self._verbose_metrics = verbose_metrics self.rl = False if attention: if attention_function: raise ConfigurationError("You can only specify an attention module or an " "attention function, but not both.") self.attention = attention elif attention_function: self.attention = LegacyAttention(attention_function) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None projection_input_dim = feedforward.get_output_dim() if self._feedforward else self.encoder.get_output_dim() if self.context_for_intent: projection_input_dim += self.encoder.get_output_dim() if self.attention_for_intent: projection_input_dim += self.encoder.get_output_dim() self.intent_projection_layer = Linear(projection_input_dim, self.num_intents) if num_train_examples: try: pos_weight = torch.tensor([log10((num_train_examples - self.vocab._retained_counter[intent_label_namespace][t]) / self.vocab._retained_counter[intent_label_namespace][t]) for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) except: pos_weight = torch.tensor([1. for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) else: # pos_weight = torch.tensor([(lambda t: 1. if "general" in t else nongeneral_intent_weight)(t) for i, t in pos_weight = torch.tensor([(lambda t: nongeneral_intent_weight if "Request" in t else 1.)(t) for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) self.intent_loss = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight, reduction="none") tag_projection_input_dim = feedforward.get_output_dim() if self._feedforward else self.encoder.get_output_dim() if self.context_for_tag: tag_projection_input_dim += self.encoder.get_output_dim() if self.attention_for_tag: tag_projection_input_dim += self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(tag_projection_input_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(sequence_label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions if crf_decoding: self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) else: self.crf = None self._intent_f1_metric = MultiLabelF1Measure(vocab, namespace=intent_label_namespace) self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=sequence_label_namespace, label_encoding=label_encoding) self._dai_f1_metric = DialogActItemF1Measure() check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = FeedForward( input_dim=66, num_layers=100, hidden_dims=64, activations=torch.nn.ReLU(), dropout=0.5), include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30) -> None: super().__init__(vocab) self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() max_turn_length = 12 self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_atten = TimeDistributed( torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self.t = TimeDistributed( torch.nn.Linear(self._encoding_dim * 2, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding( max_turn_length, marker_embedding_dim * num_context_answers) self._prev_ans_marker = torch.nn.Embedding( (num_context_answers * 4) + 1, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) check_dimensions_match( phrase_layer.get_input_dim(), text_field_embedder.get_output_dim() + marker_embedding_dim * num_context_answers, "phrase layer input dim", "embedding dim + marker dim * num context answers") initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention: MatrixAttention, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match( modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim", ) check_dimensions_match( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim", ) check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim", ) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(GraphParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError(f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)
def __init__(self, vocab: Vocabulary, context_field_embedder: TextFieldEmbedder, context_encoder: Seq2SeqEncoder, target_encoding_pooling_function: str = 'mean', feedforward: Optional[FeedForward] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels', loss_weights: Optional[List[float]] = None) -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param context_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is NOT None. :param context_encoder: Encodes the context sentence/text. :param target_encoding_pooling_function: Pooling function to be used to create a representation for the target from the encoded context. This pooled representation will then be given to the Optional FeedForward layer. This can be either `mean` for mean pooling or `max` for max pooling. If this is `max` a `relu` function is used before the pooling (this is to overcome the padding issue where some vectors will be zero due to padding.). :param feedforward: An optional feed forward layer to apply after the target encoding average function. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. :param label_name: Name of the label name space. :param loss_weights: The amount of weight to give the negative, neutral, positive classes respectively. e.g. [0.2, 0.5, 0.3] would weight the negative class by a factor of 0.2, neutral by 0.5 and positive by 0.3. NOTE It assumes the sentiment labels are the following: [negative, neutral, positive]. This is based on the TD-BERT model by `Gao et al. 2019 <https://ieeexplore.ieee.org/abstract/document/8864964>`_ figure 2. The `target_encoding_pooling_function` when equal to `max` and the `context_field_embedder` is BERT will be identical to TD-BERT. ''' self.label_name = label_name self.context_field_embedder = context_field_embedder self.context_encoder = context_encoder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.feedforward = feedforward allowed_pooling_functions = ['max', 'mean'] if target_encoding_pooling_function not in allowed_pooling_functions: raise ValueError('Target Encoding Pooling function has to be one ' f'of: {allowed_pooling_functions} not: ' f'{target_encoding_pooling_function}') self.target_encoding_pooling_function = target_encoding_pooling_function self.mean_pooler = BagOfEmbeddingsEncoder(self.context_encoder.get_output_dim(), averaged=True) # Set the loss weights (have to sort them by order of label index in # the vocab) self.loss_weights = target_sentiment.util.loss_weight_order(self, loss_weights, self.label_name) if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.context_encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = { "accuracy": CategoricalAccuracy() } self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary(self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) # Dropout self._variational_dropout = InputVariationalDropout(dropout) check_dimensions_match(context_field_embedder.get_output_dim(), context_encoder.get_input_dim(), 'Embedding', 'Encoder') if self.feedforward is not None: check_dimensions_match(context_encoder.get_output_dim(), feedforward.get_input_dim(), 'Encoder', 'FeedForward') initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Optional[Seq2SeqEncoder], choice_encoder: Optional[Seq2SeqEncoder], similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, embeddings_dropout_value: Optional[float] = 0.0, encoder_dropout_value: Optional[float] = 0.0, ) -> None: super(QAMultiChoiceESIM, self).__init__(vocab) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder self._output_feedforward = output_feedforward self._output_logit = output_logit check_dimensions_match(choice_encoder.get_output_dim(), question_encoder.get_output_dim(), "choice_encoder output dim", "question_encoder output dim") check_dimensions_match(text_field_embedder.get_output_dim(), question_encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(question_encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._use_cuda = (torch.cuda.is_available() and torch.cuda.current_device() >= 0) self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout( p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x if encoder_dropout_value: self.dropout = torch.nn.Dropout(encoder_dropout_value) self.rnn_input_dropout = VariationalDropout(encoder_dropout_value) else: self.dropout = None self.rnn_input_dropout = None self._question_encoder = question_encoder # choices encoding self._choice_encoder = choice_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") question_output_dim = self._text_field_embedder.get_output_dim() if self._question_encoder is not None: question_output_dim = self._question_encoder.get_output_dim() choice_output_dim = self._text_field_embedder.get_output_dim() if self._choice_encoder is not None: choice_output_dim = self._choice_encoder.get_output_dim() self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answering_abilities: List[str] = None, ) -> None: super().__init__(vocab, regularizer) if answering_abilities is None: self.answering_abilities = [ "passage_span_extraction", "question_span_extraction", "addition_subtraction", "counting", ] else: self.answering_abilities = answering_abilities text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._passage_weights_predictor = torch.nn.Linear(modeling_out_dim, 1) self._question_weights_predictor = torch.nn.Linear(encoding_out_dim, 1) if len(self.answering_abilities) > 1: self._answer_ability_predictor = FeedForward( modeling_out_dim + encoding_out_dim, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, len(self.answering_abilities)], num_layers=2, dropout=dropout_prob, ) if "passage_span_extraction" in self.answering_abilities: self._passage_span_extraction_index = self.answering_abilities.index( "passage_span_extraction" ) self._passage_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) self._passage_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) if "question_span_extraction" in self.answering_abilities: self._question_span_extraction_index = self.answering_abilities.index( "question_span_extraction" ) self._question_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) self._question_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) if "addition_subtraction" in self.answering_abilities: self._addition_subtraction_index = self.answering_abilities.index( "addition_subtraction" ) self._number_sign_predictor = FeedForward( modeling_out_dim * 3, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 3], num_layers=2, ) if "counting" in self.answering_abilities: self._counting_index = self.answering_abilities.index("counting") self._count_number_predictor = FeedForward( modeling_out_dim, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 10], num_layers=2, ) self._drop_metrics = DropEmAndF1() self._dropout = torch.nn.Dropout(p=dropout_prob) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, calculate_span_f1: bool = None, label_encoding: Optional[str] = None, label_namespace: str = "labels", verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), parameter_metrics: Dict[str, Metric] = {}, activation_metrics: Dict[str, Metric] = {}, infinity: float=1e3, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_classes) ) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } # We keep calculate_span_f1 as a constructor argument for API consistency with # the CrfTagger, even it is redundant in this class # (label_encoding serves the same purpose). if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError( "calculate_span_f1 is True, but no label_encoding was specified." ) self._f1_metric = SpanBasedF1Measure( vocab, tag_namespace=label_namespace, label_encoding=label_encoding ) else: self._f1_metric = None initializer(self) self.parameter_metrics = parameter_metrics self.activation_metrics = activation_metrics self.infinity = infinity
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder_word: Seq2SeqEncoder, attn_word: attention_module.BaseAttention, attn_sent: attention_module.BaseAttention, encoder_sent: Seq2SeqEncoder, thresh: float = 0.5, label_namespace: str = "labels", dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_indexer: str = "LabelIndicesBiMap") -> None: super(HierAttnNetworkClassifier, self).__init__(vocab, regularizer) # Label Information self.label_namespace = label_namespace self.label_indexer = eval(label_indexer) # FIXME: Implement this self.num_labels = self.label_indexer.get_num_labels() # Prediction thresholds self.thresh = thresh self.log_thresh = np.log(thresh + 1e-5) # Model # Text encoders self.text_field_embedder = text_field_embedder # Sentence and doc encoders self.encoder_word = encoder_word self.encoder_sent = encoder_sent # Attention Modules self.key_dim = attn_sent.get_key_dim() self.attn_word = attn_word self.attn_sent = attn_sent if dropout: self.dropout = Dropout(dropout) else: self.dropout = None # Label prediction self.output_dim = self.attn_sent.get_output_dim() self.logits_layer = Linear(self.output_dim, self.num_labels) self.classification_metric = ClassificationMetrics( self.num_labels, label_indexer) initializer(self) # Some dimension checks check_dimensions_match(text_field_embedder.get_output_dim(), encoder_word.get_input_dim(), "text field embedding dim", "word encoder input dim") check_dimensions_match(encoder_word.get_output_dim(), attn_word.get_input_dim(), "word encoder output", "word attention input") check_dimensions_match(attn_word.get_output_dim(), encoder_sent.get_input_dim(), "word attention output", "sent encoder input") check_dimensions_match(encoder_sent.get_output_dim(), attn_sent.get_input_dim(), "sent encoder output", "sent attn input")
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, target_encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, target_field_embedder: Optional[TextFieldEmbedder] = None, attention_activation_function: Optional[str] = 'tanh', initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, word_dropout: float = 0.0, dropout: float = 0.0) -> None: ''' :param vocab: vocab : A Vocabulary, required in order to compute sizes for input/output projections. :param text_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is not None. :param text_encoder: Sequence Encoder that will create the representation of each token in the context sentence. :param target_encoder: Encoder that will create the representation of target text tokens. :param feedforward: An optional feed forward layer to apply after either the text encoder if target encoder is None. Else it would be after the target and the text encoded representations have been concatenated. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a seperate embedding for text and target text. :param attention_activation_function: The name of the activation function applied after the ``h^T W t + b`` calculation. Activation names can be found `here <https://allenai.github.io/ allennlp-docs/api/allennlp.nn. activations.html>`_. Default is tanh. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param word_dropout: Dropout that is applied after the embedding of the tokens/words. It will drop entire words with this probabilty. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. This attention target classifier is based on the model in `Exploiting Document Knowledge for Aspect-level Sentiment Classification Ruidan <https://aclanthology.info/papers/P18-2092/p18-2092>`_ where the attention on the encoded context words are based on the encoded target vector. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf ''' super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.target_encoder = target_encoder self.feedforward = feedforward attention_activation_function = Activation.by_name( f'{attention_activation_function}')() self.attention_layer = BilinearAttention( self.target_encoder.get_output_dim(), self.text_encoder.get_output_dim(), attention_activation_function, normalize=True) if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.text_encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary('labels') for label_index, label_name in label_index_name.items(): label_name = f'F1_{label_name.capitalize()}' self.f1_metrics[label_name] = F1Measure(label_index) self._word_dropout = WordDrouput(word_dropout) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) self.loss = torch.nn.CrossEntropyLoss() # Ensure that the dimensions of the text field embedder and text encoder # match check_dimensions_match(text_field_embedder.get_output_dim(), text_encoder.get_input_dim(), "text field embedding dim", "text encoder input dim") # Ensure that the dimensions of the target or text field embedder and # the target encoder match target_field_embedder_dim = text_field_embedder.get_output_dim() target_field_error = "text field embedding dim" if self.target_field_embedder: target_field_embedder_dim = target_field_embedder.get_output_dim() target_field_error = "target field embedding dim" check_dimensions_match(target_field_embedder_dim, target_encoder.get_input_dim(), target_field_error, "target encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, span_feedforward: FeedForward, binary_feature_dim: int, max_span_width: int, binary_feature_size: int, distance_feature_size: int, ontology_path: str, embedding_dropout: float = 0.2, srl_label_namespace: str = "labels", constit_label_namespace: str = "constit_labels", fast_mode: bool = True, loss_type: str = "hamming", unlabeled_constits: bool = False, np_pp_constits: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ScaffoldedFrameSrl, self).__init__(vocab, regularizer) # Base token-level encoding. self.text_field_embedder = text_field_embedder self.embedding_dropout = Dropout(p=embedding_dropout) # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.stacked_encoder = stacked_encoder if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder.") # Span-level encoding. self.max_span_width = max_span_width self.span_width_embedding = Embedding(max_span_width, binary_feature_size) # Based on the average sentence length in FN train. self.span_distance_bin = 25 self.span_distance_embedding = Embedding(self.span_distance_bin, distance_feature_size) self.span_direction_embedding = Embedding(2, binary_feature_size) self.span_feedforward = TimeDistributed(span_feedforward) self.head_scorer = TimeDistributed( torch.nn.Linear(stacked_encoder.get_output_dim(), 1)) self.num_srl_args = self.vocab.get_vocab_size(srl_label_namespace) self.not_a_span_tag = self.vocab.get_token_index( "*", srl_label_namespace) self.outside_span_tag = self.vocab.get_token_index( "O", srl_label_namespace) self.semi_crf = SemiMarkovConditionalRandomField( num_tags=self.num_srl_args, max_span_width=max_span_width, default_tag=self.not_a_span_tag, outside_span_tag=self.outside_span_tag, loss_type=loss_type) # self.crf = ConditionalRandomField(self.num_classes) self.unlabeled_constits = unlabeled_constits self.np_pp_constits = np_pp_constits self.constit_label_namespace = constit_label_namespace assert not (unlabeled_constits and np_pp_constits) if unlabeled_constits: self.num_constit_tags = 2 elif np_pp_constits: self.num_constit_tags = 3 else: self.num_constit_tags = self.vocab.get_vocab_size( constit_label_namespace) # Topmost MLP. self.srl_arg_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_srl_args)) self.constit_arg_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_constit_tags)) # Evaluation. self.metrics = { "constituents": NonBioSpanBasedF1Measure(vocab, tag_namespace=constit_label_namespace, ignore_classes=["*"]), "srl": NonBioSpanBasedF1Measure(vocab, tag_namespace=srl_label_namespace, ignore_classes=["O", "*"], ontology_path=ontology_path) } # Mode for the model, if turned on it only evaluates on dev and calculates loss for train. self.fast_mode = fast_mode initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, span_feedforward: FeedForward, binary_feature_dim: int, max_span_width: int, binary_feature_size: int, distance_feature_size: int, ontology_path: str, embedding_dropout: float = 0.2, label_namespace: str = "labels", fast_mode: bool = True, loss_type: str = "logloss", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(FrameSemanticRoleLabeler, self).__init__(vocab, regularizer) # Base token-level encoding. self.text_field_embedder = text_field_embedder self.embedding_dropout = Dropout(p=embedding_dropout) # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.stacked_encoder = stacked_encoder if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The SRL Model uses a binary verb indicator feature, meaning " "the input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder + 1.") # Span-level encoding. self.max_span_width = max_span_width self.span_width_embedding = Embedding(max_span_width, binary_feature_size) # Based on the average sentence length in FN train. self.span_distance_bin = 25 self.span_distance_embedding = Embedding(self.span_distance_bin, distance_feature_size) self.span_direction_embedding = Embedding(2, binary_feature_size) self.span_feedforward = TimeDistributed(span_feedforward) self.head_scorer = TimeDistributed( torch.nn.Linear(stacked_encoder.get_output_dim(), 1)) self.num_classes = self.vocab.get_vocab_size(label_namespace) self.not_a_span_tag = self.vocab.get_token_index("*", label_namespace) self.outside_span_tag = self.vocab.get_token_index( "O", label_namespace) self.semi_crf = SemiMarkovConditionalRandomField( num_tags=self.num_classes, max_span_width=max_span_width, default_tag=self.not_a_span_tag, outside_span_tag=self.outside_span_tag, loss_type=loss_type) # self.crf = ConditionalRandomField(self.num_classes) # Topmost MLP. self.tag_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_classes)) # Evaluation. # For the span-based evaluation, we don't want to consider labels # for the outside span or for the dummy span, because FrameNet eval does not either. self.non_bio_span_metric = NonBioSpanBasedF1Measure( vocab, tag_namespace=label_namespace, ignore_classes=["O", "*"], ontology_path=ontology_path) # Mode for the model, if turned on it only evaluates on dev and calculates loss for train. self.fast_mode = fast_mode initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, title_text_projection: FeedForward, abstract_text_projection: FeedForward, title_text_encoder: Seq2SeqEncoder, abstract_text_encoder: Seq2SeqEncoder, bi_attention_encoder: BiAttentionEncoder, classifier_feedforward: Union[FeedForward, Maxout], bce_pos_weight: int = 10, use_positional_encoding: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EtdBCN, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.title_text_projection = title_text_projection self.abstract_text_projection = abstract_text_projection self.title_text_encoder = title_text_encoder self.abstract_text_encoder = abstract_text_encoder self.bi_attention_encoder = bi_attention_encoder self.classifier_feedforward = classifier_feedforward self.use_positional_encoding = use_positional_encoding if text_field_embedder.get_output_dim( ) != title_text_projection.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_text_projection. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), title_text_projection.get_input_dim())) if text_field_embedder.get_output_dim( ) != abstract_text_projection.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the abstract_text_projection. Found {} and {}, " "respectively.".format( text_field_embedder.get_output_dim(), abstract_text_projection.get_input_dim())) if title_text_projection.get_output_dim( ) != title_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the title_text_projection must match the " "input dimension of the title_text_encoder. Found {} and {}, " "respectively.".format(title_text_projection.get_output_dim(), title_text_encoder.get_input_dim())) if abstract_text_projection.get_output_dim( ) != abstract_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the abstract_text_projection must match the " "input dimension of the abstract_text_encoder. Found {} and {}, " "respectively.".format( abstract_text_projection.get_output_dim(), abstract_text_encoder.get_input_dim())) self.metrics = { # "roc_auc_score": RocAucScore() "hit_5": HitAtK(5), "hit_10": HitAtK(10), # "hit_100": HitAtK(100), # "marco_f1": MacroF1Measure(top_k=5,num_label=self.num_classes) } self.loss = torch.nn.BCEWithLogitsLoss( pos_weight=torch.ones(self.num_classes) * bce_pos_weight) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, constraint_type: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: warnings.warn("'constraint_type' was removed and replaced with" "'label_encoding', 'constrain_crf_decoding', and " "'calculate_span_f1' in version 0.6.1. It will be " "removed in version 0.8.", DeprecationWarning) label_encoding = constraint_type # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) elif constraint_type is not None: # Maintain deprecated behavior if constraint_type is provided self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EnhancedParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") # the unlabelled_f1 is confirmed the same from both classes self._unlabelled_f1 = F1Measure(positive_label=1) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)