def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, ignore_span_metric: bool = False) -> None: super(SemanticRoleLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField(self.num_tags, constraints) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward_layer: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = None) -> None: super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed(feedforward_layer) if feedforward_layer else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward_layer is not None: output_dim = feedforward_layer.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim") if feedforward_layer is not None: check_dimensions_match(span_extractor.get_output_dim(), feedforward_layer.get_input_dim(), "span extractor output dim", "feedforward input dim") self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CoreferenceResolver, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = SpanPruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed(torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) self._endpoint_span_extractor = EndpointSpanExtractor(context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor(input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SimpleTagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, embedding_dropout: float, pre_encode_feedforward: FeedForward, encoder: Seq2SeqEncoder, integrator: Seq2SeqEncoder, integrator_dropout: float, output_layer: Union[FeedForward, Maxout], elmo: Elmo, use_input_elmo: bool = False, use_integrator_output_elmo: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiattentiveClassificationNetwork, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if "elmo" in self._text_field_embedder._token_embedders.keys(): # pylint: disable=protected-access raise ConfigurationError("To use ELMo in the BiattentiveClassificationNetwork input, " "remove elmo from the text_field_embedder and pass an " "Elmo object to the BiattentiveClassificationNetwork and set the " "'use_input_elmo' and 'use_integrator_output_elmo' flags accordingly.") self._embedding_dropout = nn.Dropout(embedding_dropout) self._num_classes = self.vocab.get_vocab_size("labels") self._pre_encode_feedforward = pre_encode_feedforward self._encoder = encoder self._integrator = integrator self._integrator_dropout = nn.Dropout(integrator_dropout) self._elmo = elmo self._use_input_elmo = use_input_elmo self._use_integrator_output_elmo = use_integrator_output_elmo self._num_elmo_layers = int(self._use_input_elmo) + int(self._use_integrator_output_elmo) # Check that, if elmo is None, none of the elmo flags are set. if self._elmo is None and self._num_elmo_layers != 0: raise ConfigurationError("One of 'use_input_elmo' or 'use_integrator_output_elmo' is True, " "but no Elmo object was provided upon construction. Pass in an Elmo " "object to use Elmo.") if self._elmo is not None: # Check that, if elmo is not None, we use it somewhere. if self._num_elmo_layers == 0: raise ConfigurationError("Elmo object provided upon construction, but both 'use_input_elmo' " "and 'use_integrator_output_elmo' are 'False'. Set one of them to " "'True' to use Elmo, or do not provide an Elmo object upon construction.") # Check that the number of flags set is equal to the num_output_representations of the Elmo object # pylint: disable=protected-access,too-many-format-args if len(self._elmo._scalar_mixes) != self._num_elmo_layers: raise ConfigurationError("Elmo object has num_output_representations=%s, but this does not " "match the number of use_*_elmo flags set to true. use_input_elmo " "is %s, and use_integrator_output_elmo is %s".format( str(len(self._elmo._scalar_mixes)), str(self._use_input_elmo), str(self._use_integrator_output_elmo))) # Calculate combined integrator output dim, taking into account elmo if self._use_integrator_output_elmo: self._combined_integrator_output_dim = (self._integrator.get_output_dim() + self._elmo.get_output_dim()) else: self._combined_integrator_output_dim = self._integrator.get_output_dim() self._self_attentive_pooling_projection = nn.Linear( self._combined_integrator_output_dim, 1) self._output_layer = output_layer if self._use_input_elmo: check_dimensions_match(text_field_embedder.get_output_dim() + self._elmo.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim + ELMo output dim", "Pre-encoder feedforward input dim") else: check_dimensions_match(text_field_embedder.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim", "Pre-encoder feedforward input dim") check_dimensions_match(self._pre_encode_feedforward.get_output_dim(), self._encoder.get_input_dim(), "Pre-encoder feedforward output dim", "Encoder input dim") check_dimensions_match(self._encoder.get_output_dim() * 3, self._integrator.get_input_dim(), "Encoder output dim * 3", "Integrator input dim") if self._use_integrator_output_elmo: check_dimensions_match(self._combined_integrator_output_dim * 4, self._output_layer.get_input_dim(), "(Integrator output dim + ELMo output dim) * 4", "Output layer input dim") else: check_dimensions_match(self._integrator.get_output_dim() * 4, self._output_layer.get_input_dim(), "Integrator output dim * 4", "Output layer input dim") check_dimensions_match(self._output_layer.get_output_dim(), self._num_classes, "Output layer output dim", "Number of classes.") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30, max_turn_length: int = 12) -> None: super().__init__(vocab) self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_atten = TimeDistributed(torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding(max_turn_length, marker_embedding_dim * num_context_answers) self._prev_ans_marker = torch.nn.Embedding((num_context_answers * 4) + 1, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed(torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) check_dimensions_match(phrase_layer.get_input_dim(), text_field_embedder.get_output_dim() + marker_embedding_dim * num_context_answers, "phrase layer input dim", "embedding dim + marker dim * num context answers") initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, parser_model_path: str, parser_cuda_device: int, freeze_parser: bool, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._parser = load_archive(parser_model_path, cuda_device=parser_cuda_device).model self._parser._head_sentinel.requires_grad = False for child in self._parser.children(): for param in child.parameters(): param.requires_grad = False if not freeze_parser: for param in self._parser.encoder.parameters(): param.requires_grad = True initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, span_feedforward: FeedForward, binary_feature_dim: int, max_span_width: int, binary_feature_size: int, distance_feature_size: int, ontology_path: str, embedding_dropout: float = 0.2, label_namespace: str = "labels", fast_mode: bool = True, loss_type: str = "logloss", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(FrameSemanticRoleLabeler, self).__init__(vocab, regularizer) # Base token-level encoding. self.text_field_embedder = text_field_embedder self.embedding_dropout = Dropout(p=embedding_dropout) # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.stacked_encoder = stacked_encoder if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The SRL Model uses a binary verb indicator feature, meaning " "the input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder + 1.") # Span-level encoding. self.max_span_width = max_span_width self.span_width_embedding = Embedding(max_span_width, binary_feature_size) # Based on the average sentence length in FN train. self.span_distance_bin = 25 self.span_distance_embedding = Embedding(self.span_distance_bin, distance_feature_size) self.span_direction_embedding = Embedding(2, binary_feature_size) self.span_feedforward = TimeDistributed(span_feedforward) self.head_scorer = TimeDistributed( torch.nn.Linear(stacked_encoder.get_output_dim(), 1)) self.num_classes = self.vocab.get_vocab_size(label_namespace) self.not_a_span_tag = self.vocab.get_token_index("*", label_namespace) self.outside_span_tag = self.vocab.get_token_index( "O", label_namespace) self.semi_crf = SemiMarkovConditionalRandomField( num_tags=self.num_classes, max_span_width=max_span_width, default_tag=self.not_a_span_tag, outside_span_tag=self.outside_span_tag, loss_type=loss_type) # self.crf = ConditionalRandomField(self.num_classes) # Topmost MLP. self.tag_projection_layer = TimeDistributed( Linear(span_feedforward.get_output_dim(), self.num_classes)) # Evaluation. # For the span-based evaluation, we don't want to consider labels # for the outside span or for the dummy span, because FrameNet eval does not either. self.non_bio_span_metric = NonBioSpanBasedF1Measure( vocab, tag_namespace=label_namespace, ignore_classes=["O", "*"], ontology_path=ontology_path) # Mode for the model, if turned on it only evaluates on dev and calculates loss for train. self.fast_mode = fast_mode initializer(self)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, mixture_feedforward: FeedForward, decoder_beam_search: BeamSearch, max_decoding_steps: int, attention_function: SimilarityFunction, dropout: float = 0.0, num_linking_features: int = 8, rule_namespace: str = 'rule_labels', table_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(table_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField self._action_embedder = Embedding(num_embeddings=vocab.get_vocab_size(self._rule_namespace), embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal(self._first_action_embedding) torch.nn.init.normal(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) self._decoder_trainer = MaximumMarginalLikelihood() self._decoder_step = WikiTablesDecoderStep(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, attention_function=attention_function, num_start_types=self._num_start_types, num_entity_types=self._num_entity_types, mixture_feedforward=mixture_feedforward, dropout=dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, constraint_type: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: warnings.warn("'constraint_type' was removed and replaced with" "'label_encoding', 'constrain_crf_decoding', and " "'calculate_span_f1' in version 0.6.1. It will be " "removed in version 0.8.", DeprecationWarning) label_encoding = constraint_type # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) elif constraint_type is not None: # Maintain deprecated behavior if constraint_type is provided self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, modules, # TODO(dwadden) Add type. feature_size: int, max_span_width: int, target_task: str, feedforward_params: Dict[str, Union[int, float]], loss_weights: Dict[str, float], initializer: InitializerApplicator = InitializerApplicator(), module_initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None) -> None: super(DyGIE, self).__init__(vocab, regularizer) #################### # Create span extractor. self._endpoint_span_extractor = EndpointSpanExtractor( embedder.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) #################### # Set parameters. self._embedder = embedder self._loss_weights = loss_weights self._max_span_width = max_span_width self._display_metrics = self._get_display_metrics(target_task) token_emb_dim = self._embedder.get_output_dim() span_emb_dim = self._endpoint_span_extractor.get_output_dim() #################### # Create submodules. modules = Params(modules) # Helper function to create feedforward networks. def make_feedforward(input_dim): return FeedForward(input_dim=input_dim, num_layers=feedforward_params["num_layers"], hidden_dims=feedforward_params["hidden_dims"], activations=torch.nn.ReLU(), dropout=feedforward_params["dropout"]) # Submodules self._ner = NERTagger.from_params(vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("ner")) self._coref = CorefResolver.from_params(vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("coref")) self._relation = RelationExtractor.from_params(vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("relation")) self._events = EventExtractor.from_params(vocab=vocab, make_feedforward=make_feedforward, token_emb_dim=token_emb_dim, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("events")) #################### # Initialize text embedder and all submodules for module in [self._ner, self._coref, self._relation, self._events]: module_initializer(module) initializer(self)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, extra_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, extra_encoder: Seq2SeqEncoder, max_decoding_steps: int, beam_size: int = None, target_namespace: str = "tokens", target_embedding_dim: int = None, scheduled_sampling_ratio: float = 0., use_bleu: bool = True) -> None: super(InformedSeq2Seq, self).__init__(vocab) self._target_namespace = target_namespace self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) if use_bleu: pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access self._bleu = BLEU(exclude_indices={ pad_index, self._end_index, self._start_index }) else: self._bleu = None # At prediction time, we use a beam search to find the most likely sequence of target tokens. beam_size = beam_size or 1 self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) # Dense embedding of source vocab tokens. self._source_embedder = source_embedder self._extra_embedder = extra_embedder # Encodes the sequence of source embeddings into a sequence of hidden states. self._encoder = encoder self._extra_encoder = extra_encoder num_classes = self.vocab.get_vocab_size(self._target_namespace) # Dense embedding of vocab words in the target space. # TODO: target_embedding_dim should be size of the concatenated vector target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim( ) self._target_embedder = Embedding(num_classes, target_embedding_dim) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. # TODO: encoder_output_dim should be size of the concatenated vector self._encoder_output_dim = self._encoder.get_output_dim() self._decoder_output_dim = self._encoder_output_dim self._decoder_input_dim = target_embedding_dim # We'll use an LSTM cell as the recurrent cell that produces a hidden state # for the decoder at each time step. # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) # We project the hidden state from the decoder into the output vocabulary space # in order to get log probabilities of each target token, at each time step. self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, embedding_dropout: float, pre_encode_feedforward: FeedForward, encoder: Seq2SeqEncoder, integrator: Seq2SeqEncoder, integrator_dropout: float, output_layer: Union[FeedForward, Maxout], elmo: Elmo = None, use_input_elmo: bool = False, use_integrator_output_elmo: bool = False, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder if "elmo" in self._text_field_embedder._token_embedders.keys(): raise ConfigurationError( "To use ELMo in the BiattentiveClassificationNetwork input, " "remove elmo from the text_field_embedder and pass an " "Elmo object to the BiattentiveClassificationNetwork and set the " "'use_input_elmo' and 'use_integrator_output_elmo' flags accordingly." ) self._embedding_dropout = nn.Dropout(embedding_dropout) self._num_classes = self.vocab.get_vocab_size("labels") self._pre_encode_feedforward = pre_encode_feedforward self._encoder = encoder self._integrator = integrator self._integrator_dropout = nn.Dropout(integrator_dropout) self._elmo = elmo self._use_input_elmo = use_input_elmo self._use_integrator_output_elmo = use_integrator_output_elmo self._num_elmo_layers = int(self._use_input_elmo) + int( self._use_integrator_output_elmo) # Check that, if elmo is None, none of the elmo flags are set. if self._elmo is None and self._num_elmo_layers != 0: raise ConfigurationError( "One of 'use_input_elmo' or 'use_integrator_output_elmo' is True, " "but no Elmo object was provided upon construction. Pass in an Elmo " "object to use Elmo.") if self._elmo is not None: # Check that, if elmo is not None, we use it somewhere. if self._num_elmo_layers == 0: raise ConfigurationError( "Elmo object provided upon construction, but both 'use_input_elmo' " "and 'use_integrator_output_elmo' are 'False'. Set one of them to " "'True' to use Elmo, or do not provide an Elmo object upon construction." ) # Check that the number of flags set is equal to the num_output_representations of the Elmo object if len(self._elmo._scalar_mixes) != self._num_elmo_layers: raise ConfigurationError( f"Elmo object has num_output_representations={len(self._elmo._scalar_mixes)}, but this " f"does not match the number of use_*_elmo flags set to true. use_input_elmo " f"is {self._use_input_elmo}, and use_integrator_output_elmo " f"is {self._use_integrator_output_elmo}") # Calculate combined integrator output dim, taking into account elmo if self._use_integrator_output_elmo: self._combined_integrator_output_dim = ( self._integrator.get_output_dim() + self._elmo.get_output_dim()) else: self._combined_integrator_output_dim = self._integrator.get_output_dim( ) self._self_attentive_pooling_projection = nn.Linear( self._combined_integrator_output_dim, 1) self._output_layer = output_layer if self._use_input_elmo: check_dimensions_match( text_field_embedder.get_output_dim() + self._elmo.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim + ELMo output dim", "Pre-encoder feedforward input dim", ) else: check_dimensions_match( text_field_embedder.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim", "Pre-encoder feedforward input dim", ) check_dimensions_match( self._pre_encode_feedforward.get_output_dim(), self._encoder.get_input_dim(), "Pre-encoder feedforward output dim", "Encoder input dim", ) check_dimensions_match( self._encoder.get_output_dim() * 3, self._integrator.get_input_dim(), "Encoder output dim * 3", "Integrator input dim", ) if self._use_integrator_output_elmo: check_dimensions_match( self._combined_integrator_output_dim * 4, self._output_layer.get_input_dim(), "(Integrator output dim + ELMo output dim) * 4", "Output layer input dim", ) else: check_dimensions_match( self._integrator.get_output_dim() * 4, self._output_layer.get_input_dim(), "Integrator output dim * 4", "Output layer input dim", ) check_dimensions_match( self._output_layer.get_output_dim(), self._num_classes, "Output layer output dim", "Number of classes.", ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, modules, # TODO(dwadden) Add type. feature_size: int, max_span_width: int, loss_weights: Dict[str, int], lexical_dropout: float = 0.2, lstm_dropout: float = 0.4, use_attentive_span_extractor: bool = False, co_train: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None) -> None: super(DyGIE, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._loss_weights = loss_weights self._permanent_loss_weights = copy.deepcopy(self._loss_weights) # Need to add this line so things don't break. TODO(dwadden) sort out what's happening. modules = Params(modules) self._coref = CorefResolver.from_params(vocab=vocab, feature_size=feature_size, params=modules.pop("coref")) self._ner = NERTagger.from_params(vocab=vocab, feature_size=feature_size, params=modules.pop("ner")) self._relation = RelationExtractor.from_params( vocab=vocab, feature_size=feature_size, params=modules.pop("relation")) self._events = EventExtractor.from_params(vocab=vocab, feature_size=feature_size, params=modules.pop("events")) # Make endpoint span extractor. self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) if use_attentive_span_extractor: self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim()) else: self._attentive_span_extractor = None self._max_span_width = max_span_width self._display_metrics = display_metrics if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x # Do co-training if we're training on ACE and ontonotes. self._co_train = co_train # Big gotcha: PyTorch doesn't add dropout to the LSTM's output layer. We need to do this # manually. if lstm_dropout > 0: self._lstm_dropout = torch.nn.Dropout(p=lstm_dropout) else: self._lstm_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, decoder_beam_search: BeamSearch, question_embedder: TextFieldEmbedder, input_attention: Attention, past_attention: Attention, max_decoding_steps: int, action_embedding_dim: int, gnn: bool = True, decoder_use_graph_entities: bool = True, decoder_self_attend: bool = True, gnn_timesteps: int = 2, parse_sql_on_decoding: bool = True, add_action_bias: bool = True, use_neighbor_similarity_for_linking: bool = True, dataset_path: str = 'dataset', training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels', scoring_dev_params: dict = None, debug_parsing: bool = False) -> None: super().__init__(vocab) self.vocab = vocab self._encoder = encoder self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._question_embedder = question_embedder self._add_action_bias = add_action_bias self._scoring_dev_params = scoring_dev_params or {} self.parse_sql_on_decoding = parse_sql_on_decoding self._entity_encoder = TimeDistributed(entity_encoder) self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking self._self_attend = decoder_self_attend self._decoder_use_graph_entities = decoder_use_graph_entities self._action_padding_index = -1 # the padding value used by IndexField self._exact_match = Average() self._sql_evaluator_match = Average() self._action_similarity = Average() self._acc_single = Average() self._acc_multi = Average() self._beam_hit = Average() self._action_embedding_dim = action_embedding_dim num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) encoder_output_dim = encoder.get_output_dim() if gnn: encoder_output_dim += action_embedding_dim self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder_output_dim)) self._first_attended_output = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) torch.nn.init.normal_(self._first_attended_output) self._num_entity_types = 9 self._embedding_dim = question_embedder.get_output_dim() self._entity_type_encoder_embedding = Embedding( self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._linking_params = torch.nn.Linear(16, 1) torch.nn.init.uniform_(self._linking_params.weight, 0, 1) num_edge_types = 3 self._gnn = GatedGraphConv(self._embedding_dim, gnn_timesteps, num_edge_types=num_edge_types, dropout=dropout) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) if decoder_self_attend: self._transition_function = AttendPastSchemaItemsTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, past_attention=past_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) else: self._transition_function = LinkingTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) self._ent2ent_ff = FeedForward(action_embedding_dim, 1, action_embedding_dim, Activation.by_name('relu')()) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) # TODO: Remove hard-coded dirs self._evaluate_func = partial( evaluate, db_dir=os.path.join(dataset_path, 'database'), table=os.path.join(dataset_path, 'tables.json'), check_valid=False) self.debug_parsing = debug_parsing
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder = None, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, include_start_end_transitions: bool = True, dropout: Optional[float] = None, use_upos_constraints: bool = True, use_lemma_constraints: bool = True, train_with_constraints: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.train_with_constraints = train_with_constraints self.encoder = encoder if self.encoder is not None: encoder_output_dim = self.encoder.get_output_dim() else: encoder_output_dim = self.text_field_embedder.get_output_dim() if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = encoder_output_dim self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) self._label_namespace = label_namespace labels = self.vocab.get_index_to_token_vocabulary(self._label_namespace) constraints = streusle_allowed_transitions(labels) self.use_upos_constraints = use_upos_constraints self.use_lemma_constraints = use_lemma_constraints if self.use_lemma_constraints and not self.use_upos_constraints: raise ConfigurationError("If lemma constraints are applied, UPOS constraints must be applied as well.") if self.use_upos_constraints: # Get a dict with a mapping from UPOS to allowed LEXCAT here. self._upos_to_allowed_lexcats: Dict[str, Set[str]] = get_upos_allowed_lexcats( stronger_constraints=self.use_lemma_constraints) # Dict with a amapping from UPOS to dictionary of [UPOS, list of additionally allowed LEXCATS] self._lemma_to_allowed_lexcats: Dict[str, Dict[str, List[str]]] = get_lemma_allowed_lexcats() # Use labels and the upos_to_allowed_lexcats to get a dict with # a mapping from UPOS to a mask with 1 at allowed label indices and 0 at # disallowed label indices. self._upos_to_label_mask: Dict[str, torch.Tensor] = {} for upos in ALL_UPOS: # Shape: (num_labels,) upos_label_mask = torch.zeros(len(labels), device=next(self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): if len(label.split("-")) == 1: upos_label_mask[label_index] = 1 continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith("o-"): # Label does not start with O-/o-, always allowed. upos_label_mask[label_index] = 1 elif label_lexcat in self._upos_to_allowed_lexcats[upos]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. upos_label_mask[label_index] = 1 self._upos_to_label_mask[upos] = upos_label_mask # Use labels and the lemma_to_allowed_lexcats to get a dict with # a mapping from lemma to a mask with 1 at an _additionally_ allowed label index # and 0 at disallowed label indices. If lemma_to_label_mask has a 0, and upos_to_label_mask # has a 0, the lexcat is not allowed for the (upos, lemma). If either lemma_to_label_mask or # upos_to_label_mask has a 1, the lexcat is allowed for the (upos, lemma) pair. self._lemma_upos_to_label_mask: Dict[Tuple[str, str], torch.Tensor] = {} for lemma in SPECIAL_LEMMAS: for upos_tag in ALL_UPOS: # No additional constraints, should be all zero if upos_tag not in self._lemma_to_allowed_lexcats[lemma]: continue # Shape: (num_labels,) lemma_upos_label_mask = torch.zeros(len(labels), device=next(self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): # For ~i, etc. tags. We don't deal with them here. if len(label.split("-")) == 1: continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith("o-"): # Label does not start with O-/o-, so we don't deal with it here continue if label_lexcat in self._lemma_to_allowed_lexcats[lemma][upos_tag]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. lemma_upos_label_mask[label_index] = 1 self._lemma_upos_to_label_mask[(lemma, upos_tag)] = lemma_upos_label_mask self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.accuracy_metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.streuseval_metric = Streuseval() if encoder is not None: check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, attention: Attention, mixture_feedforward: FeedForward = None, add_action_bias: bool = True, dropout: float = 0.0, num_linking_features: int = 0, num_entity_bits: int = 0, entity_bits_output: bool = True, use_entities: bool = False, denotation_only: bool = False, # Deprecated parameter to load older models entity_encoder: Seq2VecEncoder = None, # pylint: disable=unused-argument entity_similarity_mode: str = "dot_product", rule_namespace: str = 'rule_labels') -> None: super(QuarelSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = Average() self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._embedding_dim = question_embedder.get_output_dim() self._use_entities = use_entities # Note: there's only one non-trivial entity type in QuaRel for now, so most of the # entity_type stuff is irrelevant self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 1 # Hardcoded until we feed lf syntax into the model self._entity_type_encoder_embedding = Embedding(self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding(self._num_entity_types, action_embedding_dim) self._entity_similarity_layer = None self._entity_similarity_mode = entity_similarity_mode if self._entity_similarity_mode == "weighted_dot_product": self._entity_similarity_layer = \ TimeDistributed(torch.nn.Linear(self._embedding_dim, 1, bias=False)) # Center initial values around unweighted dot product self._entity_similarity_layer._module.weight.data += 1 # pylint: disable=protected-access elif self._entity_similarity_mode == "dot_product": pass else: raise ValueError("Invalid entity_similarity_mode: {}".format(self._entity_similarity_mode)) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None self._decoder_trainer = MaximumMarginalLikelihood() self._encoder_output_dim = self._encoder.get_output_dim() if entity_bits_output: self._encoder_output_dim += num_entity_bits self._entity_bits_output = entity_bits_output self._debug_count = 10 self._num_denotation_cats = 2 # Hardcoded for simplicity self._denotation_only = denotation_only if self._denotation_only: self._denotation_accuracy_cat = CategoricalAccuracy() self._denotation_classifier = torch.nn.Linear(self._encoder_output_dim, self._num_denotation_cats) # Rest of init not needed for denotation only where no decoding to actions needed return self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._num_actions = num_actions self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # We are tying the action embeddings used for input and output # self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = self._action_embedder # tied weights self._add_action_bias = add_action_bias if self._add_action_bias: self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(self._encoder_output_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) self._decoder_step = LinkingTransitionFunction(encoder_output_dim=self._encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=attention, num_start_types=self._num_start_types, predict_start_type_separately=False, add_action_bias=self._add_action_bias, mixture_feedforward=mixture_feedforward, dropout=dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._metrics = SquadEmAndF1() self._dropout = torch.nn.Dropout( p=dropout_prob) if dropout_prob > 0 else lambda x: x # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = { x: Average() for x in self._bleu_score_types_to_use } # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = [ "rouge-{0}".format(x) for x in range(1, max_rouge_n + 1) ] rouge_scores_names = rouge_n_metrics + [ y for y in self._rouge_score_types_to_use if y != 'rouge-n' ] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge( metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answering_abilities: List[str] = None) -> None: super().__init__(vocab, regularizer) if answering_abilities is None: self.answering_abilities = [ "passage_span_extraction", "question_span_extraction", "addition_subtraction", "counting" ] else: self.answering_abilities = answering_abilities text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._passage_weights_predictor = torch.nn.Linear(modeling_out_dim, 1) self._question_weights_predictor = torch.nn.Linear(encoding_out_dim, 1) if len(self.answering_abilities) > 1: self._answer_ability_predictor = FeedForward( modeling_out_dim + encoding_out_dim, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[modeling_out_dim, len(self.answering_abilities)], num_layers=2, dropout=dropout_prob) if "passage_span_extraction" in self.answering_abilities: self._passage_span_extraction_index = self.answering_abilities.index( "passage_span_extraction") self._passage_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[modeling_out_dim, 1], num_layers=2) self._passage_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[modeling_out_dim, 1], num_layers=2) if "question_span_extraction" in answering_abilities: self._question_span_extraction_index = self.answering_abilities.index( "question_span_extraction") self._question_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[modeling_out_dim, 1], num_layers=2) self._question_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[modeling_out_dim, 1], num_layers=2) if "addition_subtraction" in answering_abilities: self._addition_subtraction_index = self.answering_abilities.index( "addition_subtraction") self._number_sign_predictor = FeedForward( modeling_out_dim * 3, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[modeling_out_dim, 3], num_layers=2) if "counting" in answering_abilities: self._counting_index = self.answering_abilities.index("counting") self._count_number_predictor = FeedForward( modeling_out_dim, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[modeling_out_dim, 10], num_layers=2) self._drop_metrics = DropEmAndF1() self._dropout = torch.nn.Dropout(p=dropout_prob) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlowBasic, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = {x: Average() for x in self._bleu_score_types_to_use} # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = ["rouge-{0}".format(x) for x in range(1, max_rouge_n + 1)] rouge_scores_names = rouge_n_metrics + [y for y in self._rouge_score_types_to_use if y != 'rouge-n'] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge(metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), top_k: int = 1, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.top_k = top_k self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError( "calculate_span_f1 is True, but no label_encoding was specified." ) self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) if feedforward is not None: check_dimensions_match( encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) initializer(self)
def __init__( self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, max_decoding_steps: int, attention: Attention = None, attention_function: SimilarityFunction = None, beam_size: int = None, target_namespace: str = "tokens", target_embedding_dim: int = None, scheduled_sampling_ratio: float = 0.0, use_bleu: bool = True, bleu_ngram_weights: Iterable[float] = (0.25, 0.25, 0.25, 0.25), ) -> None: super().__init__(vocab) self._target_namespace = target_namespace self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) if use_bleu: pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) self._bleu = BLEU(bleu_ngram_weights, exclude_indices={ pad_index, self._end_index, self._start_index }) else: self._bleu = None # At prediction time, we use a beam search to find the most likely sequence of target tokens. beam_size = beam_size or 1 self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) # Dense embedding of source vocab tokens. self._source_embedder = source_embedder # Encodes the sequence of source embeddings into a sequence of hidden states. self._encoder = encoder num_classes = self.vocab.get_vocab_size(self._target_namespace) # Attention mechanism applied to the encoder output for each step. if attention: if attention_function: raise ConfigurationError( "You can only specify an attention module or an " "attention function, but not both.") self._attention = attention elif attention_function: self._attention = LegacyAttention(attention_function) else: self._attention = None # Dense embedding of vocab words in the target space. target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim( ) self._target_embedder = Embedding(num_embeddings=num_classes, embedding_dim=target_embedding_dim) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. self._encoder_output_dim = self._encoder.get_output_dim() self._decoder_output_dim = self._encoder_output_dim if self._attention: # If using attention, a weighted average over encoder outputs will be concatenated # to the previous target embedding to form the input to the decoder at each # time step. self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim else: # Otherwise, the input to the decoder is just the previous target embedding. self._decoder_input_dim = target_embedding_dim # We'll use an LSTM cell as the recurrent cell that produces a hidden state # for the decoder at each time step. # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) # We project the hidden state from the decoder into the output vocabulary space # in order to get log probabilities of each target token, at each time step. self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, title_text_projection: FeedForward, abstract_text_projection: FeedForward, title_text_encoder: Seq2SeqEncoder, abstract_text_encoder: Seq2SeqEncoder, bi_attention_encoder: BiAttentionEncoder, classifier_feedforward: Union[FeedForward, Maxout], bce_pos_weight: int = 10, use_positional_encoding: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EtdBCN, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.title_text_projection = title_text_projection self.abstract_text_projection = abstract_text_projection self.title_text_encoder = title_text_encoder self.abstract_text_encoder = abstract_text_encoder self.bi_attention_encoder = bi_attention_encoder self.classifier_feedforward = classifier_feedforward self.use_positional_encoding = use_positional_encoding if text_field_embedder.get_output_dim( ) != title_text_projection.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_text_projection. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), title_text_projection.get_input_dim())) if text_field_embedder.get_output_dim( ) != abstract_text_projection.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the abstract_text_projection. Found {} and {}, " "respectively.".format( text_field_embedder.get_output_dim(), abstract_text_projection.get_input_dim())) if title_text_projection.get_output_dim( ) != title_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the title_text_projection must match the " "input dimension of the title_text_encoder. Found {} and {}, " "respectively.".format(title_text_projection.get_output_dim(), title_text_encoder.get_input_dim())) if abstract_text_projection.get_output_dim( ) != abstract_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the abstract_text_projection must match the " "input dimension of the abstract_text_encoder. Found {} and {}, " "respectively.".format( abstract_text_projection.get_output_dim(), abstract_text_encoder.get_input_dim())) self.metrics = { # "roc_auc_score": RocAucScore() "hit_5": HitAtK(5), "hit_10": HitAtK(10), # "hit_100": HitAtK(100), # "marco_f1": MacroF1Measure(top_k=5,num_label=self.num_classes) } self.loss = torch.nn.BCEWithLogitsLoss( pos_weight=torch.ones(self.num_classes) * bce_pos_weight) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match( modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim", ) check_dimensions_match( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim", ) check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim", ) self._accuracy = BooleanAccuracy() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, vecoder: Seq2VecEncoder, sen_encoder: Seq2VecEncoder, max_decoding_steps: int = 64, attention: Attention = None, beam_size: int = None, target_namespace: str = "tokens", scheduled_sampling_ratio: float = 0.4, ) -> None: super().__init__(vocab) self._target_namespace = target_namespace self._scheduled_sampling_ratio = scheduled_sampling_ratio # Maybe we can try self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self.pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) self._max_decoding_steps = max_decoding_steps self.vocab = vocab # anything about dims self.sen_num = 10 # with open('cy/openkg.pk', 'rb') as f: with open('fd/openkg.pk', 'rb') as f: self.symp_mat = torch.tensor(pickle.load(f)).float() # self.symp_mat = torch.nn.Parameter(self.kg_mat).cuda() # self.evovl_mat = torch.zeros(len(self.kg_mat), len(self.kg_mat)).cuda() with open('fd/idx2word.pk', 'rb') as f: self.word_idx = pickle.load(f) self.idx_word = {v: k for k, v in self.word_idx.items()} self.vocab_to_idx = {} self.idx_to_vocab_list = [] self.vocab_list = [] for k, word in self.word_idx.items(): self.vocab_to_idx[vocab.get_token_index(word.strip())] = k self.vocab_list.append(self.vocab_to_idx[vocab.get_token_index( word.strip())]) self.symp_size = len(self.symp_mat) + self.sen_num self.topic = len(self.symp_mat) self._encoder = encoder self._vecoder = vecoder self._sen_encoder = sen_encoder self.outfeature = self._sen_encoder.get_output_dim() # anything about graph self.symp_state = torch.nn.Parameter( torch.Tensor(self.symp_size, self.outfeature)) #.cuda() torch.nn.init.xavier_uniform_(self.symp_state, gain=1.414) self.predict_layer = torch.nn.Parameter( torch.Tensor(self.symp_size, self.outfeature)) self.predict_bias = torch.nn.Parameter(torch.Tensor(self.symp_size)) torch.nn.init.kaiming_uniform_(self.predict_layer) torch.nn.init.uniform_(self.predict_bias, -1 / self.symp_size**0.5, 1 / self.symp_size**0.5) self.attn_one = GATAttention(self.outfeature, self.outfeature, 1) self.attn_two = GATAttention(self.outfeature, self.outfeature, 1) self.attn_three = GATAttention(self.outfeature, self.outfeature, 1) # Metric self.kd_metric = KD_Metric() self.bleu_aver = NLTK_BLEU(ngram_weights=(0.25, 0.25, 0.25, 0.25)) self.bleu1 = NLTK_BLEU(ngram_weights=(1, 0, 0, 0)) self.bleu2 = NLTK_BLEU(ngram_weights=(0, 1, 0, 0)) self.bleu4 = NLTK_BLEU(ngram_weights=(0, 0, 0, 1)) self.topic_acc = Average() # anything about module self._source_embedder = source_embedder num_classes = self.vocab.get_vocab_size(self._target_namespace) target_embedding_dim = source_embedder.get_output_dim() self._target_embedder = Embedding(num_classes, target_embedding_dim) self._encoder_output_dim = self._encoder.get_output_dim( ) # 600 要不把前两个都换成outfeater得了 self._decoder_output_dim = self._encoder_output_dim self._decoder_input_dim = target_embedding_dim self._attention = None if attention: self._attention = attention self._decoder_input_dim = self._decoder_output_dim + target_embedding_dim # 在这里把那个embedding融合进入试试? self.before_linear = Linear(2 * self.outfeature, self.outfeature) self._decoder_cell = LSTMCell( self._decoder_input_dim + self.outfeature, self._decoder_output_dim) self._output_projection_layer = Linear(self.outfeature, num_classes) self.linear_all = Linear(self.outfeature * 3 + self._decoder_input_dim, 1) self.attention_linear = Linear(self.outfeature, self.outfeature) self.decoder_linear = Linear(self.outfeature, self.outfeature) self.get_attn = Linear(self.outfeature, 1, bias=False) self.topic_acc = MyAverage() self.topic_rec = MyAverage() self.topic_f1 = F1() self.dink1 = Distinct1() self.dink2 = Distinct2() self.clac_num = 0
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, class_weights: list = [], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, encode_together: bool = False, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self.encode_together = encode_together self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None if class_weights: self.class_weights = class_weights else: self.class_weights = [1.] * self.output_feedforward.get_output_dim() self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input", ) check_dimensions_match( projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim", ) self.metrics = {"accuracy": CategoricalAccuracy()} for _class in range(len(self.class_weights)): self.metrics.update({ f"f1_rel{_class}": F1Measure(_class), }) self._loss = torch.nn.CrossEntropyLoss(weight=torch.FloatTensor(self.class_weights)) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, coarse_to_fine: bool = False, inference_order: int = 1, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), **kwargs) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._mention_feedforward = TimeDistributed(mention_feedforward) self._mention_scorer = TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1)) self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False, ) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding( embedding_dim=feature_size, num_embeddings=self._num_distance_buckets) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._coarse_to_fine = coarse_to_fine if self._coarse_to_fine: self._coarse2fine_scorer = torch.nn.Linear( mention_feedforward.get_input_dim(), mention_feedforward.get_input_dim()) self._inference_order = inference_order if self._inference_order > 1: self._span_updating_gated_sum = GatedSum( mention_feedforward.get_input_dim()) self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__( self, vocab: Vocabulary, text_embedder: TextFieldEmbedder, definition_encoder: Seq2SeqEncoder, definition_decoder: FeedForward, definition_feedforward: FeedForward = None, definition_pooling: str = 'last', definition_namespace: str = 'definition', word_namespace: str = 'word', alpha: float = 1.0, beta: float = 8.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.definition_namespace = definition_namespace self.word_namespace = word_namespace self.definition_vocab_size = self.vocab.get_vocab_size( namespace=self.definition_namespace) self._oov_index = self.vocab.get_token_index(self.vocab._oov_token, self.definition_namespace) self.limited_word_vocab_size = None self.alpha = alpha self.beta = beta self.eps = 10e-8 logger.info( f'Definition vocab size: {self.vocab.get_vocab_size(namespace=self.definition_namespace)}' ) logger.info( f'Word vocab size: {self.vocab.get_vocab_size(namespace=self.word_namespace)}' ) logger.info('Intersection vocab size: {}'.format( len( set(self.vocab._token_to_index[ self.definition_namespace].keys()).intersection( set(self.vocab._token_to_index[ self.word_namespace].keys()))))) # TODO: check text_embedder self.text_embedder = text_embedder self.definition_encoder = definition_encoder self.definition_decoder = definition_decoder self.definition_pooling = definition_pooling if definition_feedforward is not None: self.definition_feedforward = definition_feedforward else: self.definition_feedforward = lambda x: x if self.definition_pooling == 'self-attentive': self.self_attentive_pooling_projection = nn.Linear( self.definition_encoder.get_output_dim(), 1) # checks check_dimensions_match(text_embedder.get_output_dim(), definition_encoder.get_input_dim(), 'emb_dim', 'encoder_input_dim') if self.definition_decoder.get_output_dim( ) > self.vocab.get_vocab_size(definition_namespace): ConfigurationError( f'Decoder output({self.definition_decoder.get_output_dim()}) dim is larger than' f'vocabulary size({self.vocab.get_vocab_size(definition_namespace)}).' ) if self.definition_decoder.get_output_dim( ) < self.vocab.get_vocab_size(definition_namespace): self.limited_word_vocab_size = self.definition_decoder.get_output_dim( ) # self.pdist = nn.PairwiseDistance(p=2) self.pdist = lambda x, y: torch.mean((x - y)**2, dim=1) self.metrics = {'consistency_loss': EuclideanDistance()} initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these # aren't necessarily obvious from the configuration files, so we check # here. if modeling_layer.get_input_dim() != 4 * encoding_dim: raise ConfigurationError( "The input dimension to the modeling_layer must be " "equal to 4 times the encoding dimension of the phrase_layer. " "Found {} and 4 * {} respectively.".format( modeling_layer.get_input_dim(), encoding_dim)) if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder (embedding_dim + " "char_cnn) must match the input dimension of the phrase_encoder. " "Found {} and {}, respectively.".format( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim())) if span_end_encoder.get_input_dim( ) != encoding_dim * 4 + modeling_dim * 3: raise ConfigurationError( "The input dimension of the span_end_encoder should be equal to " "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. " "Found {} and (4 * {} + 3 * {}) " "respectively.".format(span_end_encoder.get_input_dim(), encoding_dim, modeling_dim)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, intent_encoder: Seq2SeqEncoder = None, tag_encoder: Seq2SeqEncoder = None, attention: Attention = None, attention_function: SimilarityFunction = None, context_for_intent: bool = True, context_for_tag: bool = True, attention_for_intent: bool = True, attention_for_tag: bool = True, sequence_label_namespace: str = "labels", intent_label_namespace: str = "intent_labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, crf_decoding: bool = False, constrain_crf_decoding: bool = None, focal_loss_gamma: float = None, nongeneral_intent_weight: float = 5., num_train_examples: float = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.context_for_intent = context_for_intent self.context_for_tag = context_for_tag self.attention_for_intent = attention_for_intent self.attention_for_tag = attention_for_tag self.sequence_label_namespace = sequence_label_namespace self.intent_label_namespace = intent_label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(sequence_label_namespace) self.num_intents = self.vocab.get_vocab_size(intent_label_namespace) self.encoder = encoder self.intent_encoder = intent_encoder self.tag_encoder = intent_encoder self._feedforward = feedforward self._verbose_metrics = verbose_metrics self.rl = False if attention: if attention_function: raise ConfigurationError("You can only specify an attention module or an " "attention function, but not both.") self.attention = attention elif attention_function: self.attention = LegacyAttention(attention_function) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None projection_input_dim = feedforward.get_output_dim() if self._feedforward else self.encoder.get_output_dim() if self.context_for_intent: projection_input_dim += self.encoder.get_output_dim() if self.attention_for_intent: projection_input_dim += self.encoder.get_output_dim() self.intent_projection_layer = Linear(projection_input_dim, self.num_intents) if num_train_examples: try: pos_weight = torch.tensor([log10((num_train_examples - self.vocab._retained_counter[intent_label_namespace][t]) / self.vocab._retained_counter[intent_label_namespace][t]) for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) except: pos_weight = torch.tensor([1. for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) else: # pos_weight = torch.tensor([(lambda t: 1. if "general" in t else nongeneral_intent_weight)(t) for i, t in pos_weight = torch.tensor([(lambda t: nongeneral_intent_weight if "Request" in t else 1.)(t) for i, t in self.vocab.get_index_to_token_vocabulary(intent_label_namespace).items()]) self.intent_loss = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight, reduction="none") tag_projection_input_dim = feedforward.get_output_dim() if self._feedforward else self.encoder.get_output_dim() if self.context_for_tag: tag_projection_input_dim += self.encoder.get_output_dim() if self.attention_for_tag: tag_projection_input_dim += self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(tag_projection_input_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(sequence_label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions if crf_decoding: self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) else: self.crf = None self._intent_f1_metric = MultiLabelF1Measure(vocab, namespace=intent_label_namespace) self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=sequence_label_namespace, label_encoding=label_encoding) self._dai_f1_metric = DialogActItemF1Measure() check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__( self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, attention: Attention, mixture_feedforward: FeedForward = None, add_action_bias: bool = True, dropout: float = 0.0, num_linking_features: int = 0, num_entity_bits: int = 0, entity_bits_output: bool = True, use_entities: bool = False, denotation_only: bool = False, # Deprecated parameter to load older models entity_encoder: Seq2VecEncoder = None, entity_similarity_mode: str = "dot_product", rule_namespace: str = "rule_labels", ) -> None: super(QuarelSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = Average() self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._embedding_dim = question_embedder.get_output_dim() self._use_entities = use_entities # Note: there's only one non-trivial entity type in QuaRel for now, so most of the # entity_type stuff is irrelevant self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._entity_type_encoder_embedding = Embedding( self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._entity_similarity_layer = None self._entity_similarity_mode = entity_similarity_mode if self._entity_similarity_mode == "weighted_dot_product": self._entity_similarity_layer = TimeDistributed( torch.nn.Linear(self._embedding_dim, 1, bias=False)) # Center initial values around unweighted dot product self._entity_similarity_layer._module.weight.data += 1 elif self._entity_similarity_mode == "dot_product": pass else: raise ValueError("Invalid entity_similarity_mode: {}".format( self._entity_similarity_mode)) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None self._decoder_trainer = MaximumMarginalLikelihood() self._encoder_output_dim = self._encoder.get_output_dim() if entity_bits_output: self._encoder_output_dim += num_entity_bits self._entity_bits_output = entity_bits_output self._debug_count = 10 self._num_denotation_cats = 2 # Hardcoded for simplicity self._denotation_only = denotation_only if self._denotation_only: self._denotation_accuracy_cat = CategoricalAccuracy() self._denotation_classifier = torch.nn.Linear( self._encoder_output_dim, self._num_denotation_cats) # Rest of init not needed for denotation only where no decoding to actions needed return self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._num_actions = num_actions self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # We are tying the action embeddings used for input and output # self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = self._action_embedder # tied weights self._add_action_bias = add_action_bias if self._add_action_bias: self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter( torch.FloatTensor(self._encoder_output_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) self._decoder_step = LinkingTransitionFunction( encoder_output_dim=self._encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=attention, add_action_bias=self._add_action_bias, mixture_feedforward=mixture_feedforward, dropout=dropout, )
def __init__( self, vocab: Vocabulary, token_embedder: TextFieldEmbedder, entity_embedder: TextFieldEmbedder, alias_encoder: Seq2SeqEncoder, hidden_size: int, num_layers: int, dropout: float = 0.4, dropouth: float = 0.3, dropouti: float = 0.65, dropoute: float = 0.1, wdrop: float = 0.5, alpha: float = 2.0, beta: float = 1.0, tie_weights: bool = False, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super(AliasCopynet, self).__init__(vocab) # Model architecture - Note: we need to extract the `Embedding` layers from the # `TokenEmbedders` to apply dropout later on. # pylint: disable=protected-access self._token_embedder = token_embedder._token_embedders['tokens'] self._entity_embedder = entity_embedder._token_embedders['entity_ids'] self._alias_encoder = alias_encoder self._hidden_size = hidden_size self._num_layers = num_layers self._tie_weights = tie_weights # Dropout self._locked_dropout = LockedDropout() self._dropout = dropout self._dropouth = dropouth self._dropouti = dropouti self._dropoute = dropoute self._wdrop = wdrop # Regularization strength self._alpha = alpha self._beta = beta # RNN Encoders. TODO: Experiment with seperate encoder for aliases. entity_embedding_dim = entity_embedder.get_output_dim() token_embedding_dim = entity_embedder.get_output_dim() assert entity_embedding_dim == token_embedding_dim embedding_dim = token_embedding_dim rnns: List[torch.nn.Module] = [] for i in range(num_layers): if i == 0: input_size = token_embedding_dim else: input_size = hidden_size if (i == num_layers - 1) and tie_weights: output_size = token_embedding_dim else: output_size = hidden_size rnns.append( torch.nn.LSTM(input_size, output_size, batch_first=True)) rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in rnns ] self.rnns = torch.nn.ModuleList(rnns) # Various linear transformations. self._fc_mention = torch.nn.Linear(in_features=embedding_dim, out_features=2) self._fc_entity = torch.nn.Linear(in_features=embedding_dim, out_features=embedding_dim) self._fc_condense = torch.nn.Linear(in_features=2 * embedding_dim, out_features=embedding_dim) self._fc_generate = torch.nn.Linear( in_features=embedding_dim, out_features=vocab.get_vocab_size('tokens')) self._fc_copy = torch.nn.Linear(in_features=embedding_dim, out_features=embedding_dim) if tie_weights: self._fc_generate.weight = self._token_embedder.weight self._state: Optional[Dict[str, Any]] = None # Metrics # self._avg_mention_loss = Average() # self._avg_entity_loss = Average() # self._avg_vocab_loss = Average() self._unk_index = vocab.get_token_index(DEFAULT_OOV_TOKEN) self._unk_penalty = math.log(vocab.get_vocab_size('tokens_unk')) self._ppl = Ppl() self._upp = Ppl() self._kg_ppl = Ppl() # Knowledge-graph ppl self._bg_ppl = Ppl() # Background ppl initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, num_context_answers: int = 0, marker_embedding_dim: int = 10, max_span_length: int = 30) -> None: super().__init__(vocab) self._num_context_answers = num_context_answers self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._marker_embedding_dim = marker_embedding_dim self._encoding_dim = phrase_layer.get_output_dim() max_turn_length = 12 self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_atten = TimeDistributed( torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim)) self.t = TimeDistributed( torch.nn.Linear(self._encoding_dim * 2, self._encoding_dim)) self._residual_encoder = residual_encoder if num_context_answers > 0: self._question_num_marker = torch.nn.Embedding( max_turn_length, marker_embedding_dim * num_context_answers) self._prev_ans_marker = torch.nn.Embedding( (num_context_answers * 4) + 1, marker_embedding_dim) self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._followup_lin = torch.nn.Linear(self._encoding_dim, 3) self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_yesno_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 3)) self._span_followup_predictor = TimeDistributed(self._followup_lin) check_dimensions_match( phrase_layer.get_input_dim(), text_field_embedder.get_output_dim() + marker_embedding_dim * num_context_answers, "phrase layer input dim", "embedding dim + marker dim * num context answers") initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_yesno_accuracy = CategoricalAccuracy() self._span_followup_accuracy = CategoricalAccuracy() self._span_gt_yesno_accuracy = CategoricalAccuracy() self._span_gt_followup_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(GraphParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError(f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, target_encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, target_field_embedder: Optional[TextFieldEmbedder] = None, attention_activation_function: Optional[str] = 'tanh', initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, word_dropout: float = 0.0, dropout: float = 0.0) -> None: ''' :param vocab: vocab : A Vocabulary, required in order to compute sizes for input/output projections. :param text_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is not None. :param text_encoder: Sequence Encoder that will create the representation of each token in the context sentence. :param target_encoder: Encoder that will create the representation of target text tokens. :param feedforward: An optional feed forward layer to apply after either the text encoder if target encoder is None. Else it would be after the target and the text encoded representations have been concatenated. :param target_field_embedder: Used to embed the target text to give as input to the target_encoder. Thus this allows a seperate embedding for text and target text. :param attention_activation_function: The name of the activation function applied after the ``h^T W t + b`` calculation. Activation names can be found `here <https://allenai.github.io/ allennlp-docs/api/allennlp.nn. activations.html>`_. Default is tanh. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param word_dropout: Dropout that is applied after the embedding of the tokens/words. It will drop entire words with this probabilty. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. This attention target classifier is based on the model in `Exploiting Document Knowledge for Aspect-level Sentiment Classification Ruidan <https://aclanthology.info/papers/P18-2092/p18-2092>`_ where the attention on the encoded context words are based on the encoded target vector. .. _variational dropout: https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf ''' super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.target_field_embedder = target_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.target_encoder = target_encoder self.feedforward = feedforward attention_activation_function = Activation.by_name( f'{attention_activation_function}')() self.attention_layer = BilinearAttention( self.target_encoder.get_output_dim(), self.text_encoder.get_output_dim(), attention_activation_function, normalize=True) if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.text_encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary('labels') for label_index, label_name in label_index_name.items(): label_name = f'F1_{label_name.capitalize()}' self.f1_metrics[label_name] = F1Measure(label_index) self._word_dropout = WordDrouput(word_dropout) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) self.loss = torch.nn.CrossEntropyLoss() # Ensure that the dimensions of the text field embedder and text encoder # match check_dimensions_match(text_field_embedder.get_output_dim(), text_encoder.get_input_dim(), "text field embedding dim", "text encoder input dim") # Ensure that the dimensions of the target or text field embedder and # the target encoder match target_field_embedder_dim = text_field_embedder.get_output_dim() target_field_error = "text field embedding dim" if self.target_field_embedder: target_field_embedder_dim = target_field_embedder.get_output_dim() target_field_error = "target field embedding dim" check_dimensions_match(target_field_embedder_dim, target_encoder.get_input_dim(), target_field_error, "target encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, attention: Attention, max_decoding_steps: int, beam_size: int = None, target_namespace: str = "target_tokens", target_embedding_dim: int = None, scheduled_sampling_ratio: float = 0., projection_dim: int = None, use_coverage: bool = False, coverage_shift: float = 0., coverage_loss_weight: float = None, embed_attn_to_output: bool = False) -> None: super(PointerGeneratorNetwork, self).__init__(vocab) self._target_namespace = target_namespace self._start_index = self.vocab.get_token_index(START_SYMBOL, target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, target_namespace) self._unk_index = self.vocab.get_token_index(DEFAULT_OOV_TOKEN, target_namespace) self._vocab_size = self.vocab.get_vocab_size(target_namespace) assert self._vocab_size > 2, \ "Target vocabulary is empty. Make sure 'target_namespace' option of the model is correct." # Encoder self._source_embedder = source_embedder self._encoder = encoder self._encoder_output_dim = self._encoder.get_output_dim() # Decoder self._target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim( ) self._num_classes = self.vocab.get_vocab_size(target_namespace) self._target_embedder = Embedding(self._target_embedding_dim, self._num_classes) self._decoder_input_dim = self._encoder_output_dim + self._target_embedding_dim self._decoder_output_dim = self._encoder_output_dim self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) self._projection_dim = projection_dim or self._source_embedder.get_output_dim( ) hidden_projection_dim = self._decoder_output_dim if not embed_attn_to_output else self._decoder_output_dim * 2 self._hidden_projection_layer = Linear(hidden_projection_dim, self._projection_dim) self._output_projection_layer = Linear(self._projection_dim, self._num_classes) self._p_gen_layer = Linear( self._decoder_output_dim * 3 + self._decoder_input_dim, 1) self._attention = attention self._use_coverage = use_coverage self._coverage_loss_weight = coverage_loss_weight self._eps = 1e-31 self._embed_attn_to_output = embed_attn_to_output self._coverage_shift = coverage_shift # Metrics self._p_gen_sum = 0.0 self._p_gen_iterations = 0 self._coverage_loss_sum = 0.0 self._coverage_iterations = 0 # Decoding self._scheduled_sampling_ratio = scheduled_sampling_ratio self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size or 1)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, add_action_bias: bool = True, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._entity_type_encoder_embedding = Embedding( self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None