def __init__(self, vocab: Vocabulary, mention_feedforward: FeedForward, relation_feedforward: FeedForward, feature_size: int, spans_per_word: float, span_emb_dim: int, rel_prop: int = 0, rel_prop_dropout_A: float = 0.0, rel_prop_dropout_f: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), positive_label_weight: float = 1.0, regularizer: Optional[RegularizerApplicator] = None) -> None: super(RelationExtractor, self).__init__(vocab, regularizer) # Need to hack this for cases where there's no relation data. It breaks Ulme's code. self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1) # Span candidate scorer. # TODO(dwadden) make sure I've got the input dim right on this one. feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = Pruner(feedforward_scorer) # Relation scorer. self._relation_feedforward = relation_feedforward self._relation_scorer = torch.nn.Linear( relation_feedforward.get_output_dim(), self._n_labels) self._spans_per_word = spans_per_word # TODO(dwadden) Add code to compute relation F1. self._candidate_recall = CandidateRecall() self._relation_metrics = RelationMetrics() class_weights = torch.cat([ torch.tensor([1.0]), positive_label_weight * torch.ones(self._n_labels) ]) self._loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1, weight=class_weights) self.rel_prop = rel_prop # Relation Propagation self._A_network = FeedForward(input_dim=self._n_labels, num_layers=1, hidden_dims=span_emb_dim, activations=lambda x: x, dropout=rel_prop_dropout_A) self._f_network = FeedForward(input_dim=2 * span_emb_dim, num_layers=1, hidden_dims=span_emb_dim, activations=torch.nn.Sigmoid(), dropout=rel_prop_dropout_f) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidafOriginal, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), evalb_directory_path: str = DEFAULT_EVALB_DIR, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed( feedforward) if feedforward else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim", ) check_dimensions_match( encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim", ) if feedforward is not None: check_dimensions_match( span_extractor.get_output_dim(), feedforward.get_input_dim(), "span extractor output dim", "feedforward input dim", ) self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, pair2vec_dropout: float = 0.15, max_span_length: int = 30, pair2vec_model_file: str = None, pair2vec_config_file: str = None) -> None: super().__init__(vocab) self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._encoding_dim = phrase_layer.get_output_dim() self.pair2vec = pair2vec_util.get_pair2vec(pair2vec_config_file, pair2vec_model_file) self._pair2vec_dropout = torch.nn.Dropout(pair2vec_dropout) self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') # atten_dim = self._encoding_dim * 4 + 600 if ablation_type == 'attn_over_rels' else self._encoding_dim * 4 atten_dim = self._encoding_dim * 4 + 600 self._merge_atten = TimeDistributed( torch.nn.Linear(atten_dim, self._encoding_dim)) self._residual_encoder = residual_encoder self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._squad_metrics = SquadEmAndF1() initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._official_em = Average() self._official_f1 = Average() self._span_accuracy = BooleanAccuracy() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_field_embedder_elmo: TextFieldEmbedder, num_highway_layers: int, highway_dim: int, highway_elmo_dim: int, phrase_layer: Seq2SeqEncoder, soft_align_matrix_attention: SoftAlignmentMatrixAttention, self_matrix_attention: BilinearMatrixAttention, passage_modeling_layer: Seq2SeqEncoder, question_modeling_layer: Seq2SeqEncoder, question_encoding_layer: Seq2VecEncoder, passage_similarity_function: SimilarityFunction, question_similarity_function: SimilarityFunction, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(MultiGranuFusionElmo, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._text_field_embedder_elmo = text_field_embedder_elmo self._highway_layer = TimeDistributed(Highway(highway_dim, num_highway_layers)) self._highway_elmo_layer = TimeDistributed(Highway(highway_elmo_dim, num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = soft_align_matrix_attention self._self_matrix_attention = self_matrix_attention self._passage_modeling_layer = passage_modeling_layer self._question_modeling_layer = question_modeling_layer self._question_encoding_layer = question_encoding_layer self._passage_similarity_function = passage_similarity_function self._question_similarity_function = question_similarity_function passage_modeling_output_dim = self._passage_modeling_layer.get_output_dim() question_modeling_output_dim = self._question_modeling_layer.get_output_dim() encoding_dim = phrase_layer.get_output_dim() + text_field_embedder_elmo.get_output_dim() self._passage_fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim) self._question_fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim) self._fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim) self._span_start_weight = nn.Linear(passage_modeling_output_dim, question_modeling_output_dim) self._span_end_weight = nn.Linear(passage_modeling_output_dim, question_modeling_output_dim) self._span_weight = torch.FloatTensor([0.1, 1]) self._span_predictor = TimeDistributed(torch.nn.Linear(self._passage_modeling_layer.get_output_dim(), 2)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, input_dim): super(FusionLayer, self).__init__() self._input_dim = input_dim self._tanh = nn.Tanh() self._sigmoid = nn.Sigmoid() self._fusion_m = TimeDistributed( Linear(in_features=4 * input_dim, out_features=input_dim)) self._fusion_g = TimeDistributed( Linear(in_features=4 * input_dim, out_features=1))
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ModelMSMARCO, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._residual_encoder = residual_encoder self._span_end_encoder = span_end_encoder self._span_start_encoder = span_start_encoder encoding_dim = phrase_layer.get_output_dim() self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._no_answer_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._matrix_attention = TriLinearAttention(encoding_dim) self._self_matrix_attention = TriLinearAttention(encoding_dim) self._linear_layer = TimeDistributed( torch.nn.Linear(4 * encoding_dim, encoding_dim)) self._residual_linear_layer = TimeDistributed( torch.nn.Linear(3 * encoding_dim, encoding_dim)) #self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim)) #self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim)) #self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim)) #std = math.sqrt(6 / (encoding_dim + 1)) #self._w_x.data.uniform_(-std, std) #self._w_y.data.uniform_(-std, std) #self._w_xy.data.uniform_(-std, std) self._squad_metrics = SquadEmAndF1() self._rouge_metric = Rouge() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self) self._ite = 0
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, mask_lstms: bool = True, evaluation_json_file: str = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1)) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._official_em = Average() self._official_f1 = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms if evaluation_json_file: logger.info("Prepping official evaluation dataset from %s", evaluation_json_file) with open(evaluation_json_file) as dataset_file: dataset_json = json.load(dataset_file) question_to_answers = {} for article in dataset_json['data']: for paragraph in article['paragraphs']: for question in paragraph['qas']: question_id = question['id'] answers = [answer['text'] for answer in question['answers']] question_to_answers[question_id] = answers self._official_eval_dataset = question_to_answers else: self._official_eval_dataset = None
def __init__(self, embA_size: int, embB_size: int, hidden_dim: int): super(SpanRepAssembly, self).__init__() self.embA_size = embA_size self.embB_size = embB_size self.hidden_dim = hidden_dim self.hiddenA = TimeDistributed(Linear(embA_size, hidden_dim)) self.hiddenB = TimeDistributed( Linear(embB_size, hidden_dim, bias=False))
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1)), ) self._mention_pruner = Pruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1) ) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False, ) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim() ) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, max_decoding_steps: int, spans_per_word: float, target_namespace: str = "tokens", target_embedding_dim: int = None, attention_function: SimilarityFunction = None, scheduled_sampling_ratio: float = 0.0, spans_extractor: SpanExtractor = None, spans_scorer_feedforward: FeedForward = None) -> None: super(SpanAe, self).__init__(vocab) self._source_embedder = source_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._target_namespace = target_namespace self._attention_function = attention_function self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) num_classes = self.vocab.get_vocab_size(self._target_namespace) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with that of the final hidden states of the encoder. Also, if # we're using attention with ``DotProductSimilarity``, this is needed. self._decoder_output_dim = self._encoder.get_output_dim() + 1 target_embedding_dim = target_embedding_dim or self._source_embedder.get_output_dim( ) self._target_embedder = Embedding(num_classes, target_embedding_dim) if self._attention_function: self._decoder_attention = Attention(self._attention_function) # The output of attention, a weighted average over encoder outputs, will be # concatenated to the input vector of the decoder at each time step. self._decoder_input_dim = self._encoder.get_output_dim( ) + target_embedding_dim else: self._decoder_input_dim = target_embedding_dim self._decoder_cell = LSTMCell(self._decoder_input_dim + 1, self._decoder_output_dim) self._output_projection_layer = Linear(self._decoder_output_dim, num_classes) self._span_extractor = spans_extractor feedforward_scorer = torch.nn.Sequential( TimeDistributed(spans_scorer_feedforward), TimeDistributed( torch.nn.Linear(spans_scorer_feedforward.get_output_dim(), 1))) self._span_pruner = SpanPruner(feedforward_scorer) self._spans_per_word = spans_per_word
def __init__(self, vocab: Vocabulary, span_encoder: Seq2SeqEncoder, reasoning_encoder: Seq2SeqEncoder, input_dropout: float = 0.3, hidden_dim_maxpool: int = 1024, class_embs: bool=True, reasoning_use_obj: bool=True, reasoning_use_answer: bool=True, reasoning_use_question: bool=True, pool_reasoning: bool = True, pool_answer: bool = True, pool_question: bool = False, initializer: InitializerApplicator = InitializerApplicator(), ): super(AttentionQA, self).__init__(vocab) self.detector = SimpleDetector(pretrained=True, average_pool=True, semantic=class_embs, final_dim=512) ################################################################################################### self.rnn_input_dropout = TimeDistributed(InputVariationalDropout(input_dropout)) if input_dropout > 0 else None self.span_encoder = TimeDistributed(span_encoder) self.reasoning_encoder = TimeDistributed(reasoning_encoder) self.span_attention = BilinearMatrixAttention( matrix_1_dim=span_encoder.get_output_dim(), matrix_2_dim=span_encoder.get_output_dim(), ) self.obj_attention = BilinearMatrixAttention( matrix_1_dim=span_encoder.get_output_dim(), matrix_2_dim=self.detector.final_dim, ) self.reasoning_use_obj = reasoning_use_obj self.reasoning_use_answer = reasoning_use_answer self.reasoning_use_question = reasoning_use_question self.pool_reasoning = pool_reasoning self.pool_answer = pool_answer self.pool_question = pool_question dim = sum([d for d, to_pool in [(reasoning_encoder.get_output_dim(), self.pool_reasoning), (span_encoder.get_output_dim(), self.pool_answer), (span_encoder.get_output_dim(), self.pool_question)] if to_pool]) self.final_mlp = torch.nn.Sequential( torch.nn.Dropout(input_dropout, inplace=False), torch.nn.Linear(dim, hidden_dim_maxpool), torch.nn.ReLU(inplace=True), torch.nn.Dropout(input_dropout, inplace=False), torch.nn.Linear(hidden_dim_maxpool, 1), ) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def make_pruner(scorer, entity_beam, gold_beam): """ Create a pruner that either takes outputs of other scorers (i.e. entity beam), or uses its own scorer (the `default_scorer`). """ item_scorer = torch.nn.Sequential( TimeDistributed(scorer), TimeDistributed(torch.nn.Linear(scorer.get_output_dim(), 1))) min_score_to_keep = 1e-10 if entity_beam else None return Pruner(item_scorer, entity_beam, gold_beam, min_score_to_keep)
def __init__(self, hidden_size, drop_prob): super(SelfAtt, self).__init__() self.drop_prob = drop_prob self.att_wrapper = TimeDistributed(nn.Linear(hidden_size*4, hidden_size)) self.trilinear = TriLinearAttention(hidden_size) self.self_att_upsampler = TimeDistributed(nn.Linear(hidden_size*3, hidden_size*4)) self.enc = nn.GRU(hidden_size, hidden_size//2, 1, batch_first=True, bidirectional=True) self.hidden_size = hidden_size
def __init__(self, vocab: Vocabulary, mention_feedforward: FeedForward, relation_feedforward: FeedForward, spans_per_word: float, span_emb_dim: int, use_biaffine_rel: bool, rel_prop: int = 0, rel_prop_dropout_A: float = 0.0, rel_prop_dropout_f: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), positive_label_weight: float = 1.0, regularizer: Optional[RegularizerApplicator] = None) -> None: super(RelationExtractor, self).__init__(vocab, regularizer) self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = Pruner(feedforward_scorer) # Relation scorer. self._use_biaffine_rel = use_biaffine_rel if self._use_biaffine_rel: self._biaffine = torch.nn.ModuleList() for _ in range(self._n_labels): self._biaffine.append(torch.nn.Linear(span_emb_dim, span_emb_dim)) else: self._relation_feedforward = relation_feedforward self._relation_scorer = torch.nn.Linear(relation_feedforward.get_output_dim(), self._n_labels) self._spans_per_word = spans_per_word self._relation_metrics = RelationMetrics1() class_weights = torch.cat([torch.tensor([1.0]), positive_label_weight * torch.ones(self._n_labels)]) self._loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1, weight=class_weights) self.rel_prop = rel_prop # Relation Propagation self._A_network = FeedForward(input_dim=self._n_labels, num_layers=1, hidden_dims=span_emb_dim, activations=lambda x: x, dropout=rel_prop_dropout_A) self._f_network = FeedForward(input_dim=2*span_emb_dim, num_layers=1, hidden_dims=span_emb_dim, activations=torch.nn.Sigmoid(), dropout=rel_prop_dropout_f) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, preload_path: Optional[str] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder # self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") # check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, # "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self) # Do we want to initialize with the SNLI stuff? let's say yes. # 'snli-decomposable-attention/weights.th' if preload_path is not None: logger.info("Preloading!") preload = torch.load(preload_path) own_state = self.state_dict() for name, param in preload.items(): if name not in own_state: logger.info("Unexpected key {} in state_dict with size {}".format(name, param.size())) elif param.size() == own_state[name].size(): own_state[name].copy_(param) else: logger.info("Network has {} with size {}, ckpt has {}".format(name, own_state[name].size(), param.size())) missing = set(own_state.keys()) - set(preload.keys()) if len(missing) > 0: logger.info("We couldn't find {}".format(','.join(missing)))
def __init__(self, vocab: Vocabulary, span_typer: SpanTyper, embed_size: int, label_namespace: str = 'span_labels', event_namespace: str = 'event_labels'): super(ArgumentSpanClassifier, self).__init__() self.vocab: Vocabulary = vocab self.label_namespace: str = label_namespace self.event_namespace: str = event_namespace self.embed_size = embed_size self.event_embedding_size = 50 self.event_embeddings: nn.Embedding = nn.Embedding( num_embeddings=len( vocab.get_token_to_index_vocabulary( namespace=event_namespace)), embedding_dim=self.event_embedding_size) self.lexical_dropout = nn.Dropout(p=0.2) self.span_extractor: SpanExtractor = EndpointSpanExtractor( input_dim=self.embed_size, combination='x,y') self.attentive_span_extractor: SpanExtractor = SelfAttentiveSpanExtractor( embed_size) self.arg_affine = TimeDistributed( FeedForward(input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), hidden_dims=self.embed_size, num_layers=2, activations=nn.GELU(), dropout=0.2)) self.trigger_affine = FeedForward( input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), hidden_dims=self.embed_size - self.event_embedding_size, num_layers=2, activations=nn.GELU(), dropout=0.2) self.trigger_event_infusion = TimeDistributed( FeedForward(input_dim=2 * self.embed_size, hidden_dims=self.embed_size, num_layers=2, activations=nn.GELU(), dropout=0.2)) self.span_typer: SpanTyper = span_typer self.apply(self._init_weights)
def __init__(self, vocab, text_field_embedder, span_extractor, encoder, feedforward=None, pos_tag_embedding=None, initializer=InitializerApplicator(), regularizer=None, evalb_directory_path=DEFAULT_EVALB_DIR): super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size(u"labels") self.encoder = encoder self.feedforward_layer = TimeDistributed( feedforward) if feedforward else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), u"representation dim (tokens + optional POS tags)", u"encoder input dim") check_dimensions_match(encoder.get_output_dim(), span_extractor.get_input_dim(), u"encoder input dim", u"span extractor input dim") if feedforward is not None: check_dimensions_match(span_extractor.get_output_dim(), feedforward.get_input_dim(), u"span extractor output dim", u"feedforward input dim") self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, highway_embedding_size: int, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, modeling_layer: Seq2SeqEncoder, span_end_lstm: Seq2SeqEncoder, language: str = 'en', ptr_dim: int = 200, dropout: float = 0.2, max_num_passages: int = 5, max_num_character: int = 4, loss_ratio: float = 0.1, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._span_end_encoder = span_end_lstm self.loss_ratio = loss_ratio self.language = language self.max_num_character = max_num_character self.relu = torch.nn.ReLU() self.max_num_passages = max_num_passages self.ptr_dim = ptr_dim self.decay = 1.0 self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( ElasticHighway(text_field_embedder.get_output_dim(), highway_embedding_size, num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = DotProductMatrixAttention() self._modeling_layer = modeling_layer modeling_dim = modeling_layer.get_output_dim() encoding_dim = phrase_layer.get_output_dim() self._ptr_layer_1 = TimeDistributed( torch.nn.Linear(encoding_dim * 4 + modeling_dim, 1)) self._ptr_layer_2 = TimeDistributed( torch.nn.Linear(encoding_dim * 4 + modeling_dim, 1)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._rouge_metrics = MsmarcoRouge() self._bleu_metrics = BLEU() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2VecEncoder, answers_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._classifier_feedforward = classifier_feedforward self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer encoding_dim = phrase_layer.get_output_dim() self._time_distributed_highway_layer = TimeDistributed( self._highway_layer) self._answers_encoder = TimeDistributed(answers_encoder) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab, text_field_embedder, phrase_layer, residual_encoder, span_start_encoder, span_end_encoder, initializer, dropout=0.2, mask_lstms=True): super(BiDAFSelfAttention, self).__init__(vocab) # Initialize layers. self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer # Inintialize start/end span predictors. encoding_dim = phrase_layer.get_output_dim() self._matrix_attention = TriLinearAttention(encoding_dim) self._merge_atten = TimeDistributed( torch.nn.Linear(encoding_dim * 4, encoding_dim)) self._residual_encoder = residual_encoder self._self_atten = TriLinearAttention(encoding_dim) self._merge_self_atten = TimeDistributed( torch.nn.Linear(encoding_dim * 3, encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._official_em = Average() self._official_f1 = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) # self._dropout = VariationalDropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms
def __init__(self, vocab: Vocabulary, task: str, encoder: Seq2SeqEncoder, label_smoothing: float = 0.0, dropout: float = 0.0, adaptive: bool = False, features: List[str] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(TagDecoder, self).__init__(vocab, regularizer) self.task = task self.encoder = encoder self.output_dim = encoder.get_output_dim() self.label_smoothing = label_smoothing self.num_classes = self.vocab.get_vocab_size(task) self.adaptive = adaptive self.features = features if features else [] self.metrics = { "acc": CategoricalAccuracy(), # "acc3": CategoricalAccuracy(top_k=3) } if self.adaptive: # TODO adaptive_cutoffs = [ round(self.num_classes / 15), 3 * round(self.num_classes / 15) ] self.task_output = AdaptiveLogSoftmaxWithLoss( self.output_dim, self.num_classes, cutoffs=adaptive_cutoffs, div_value=4.0) else: self.task_output = TimeDistributed( Linear(self.output_dim, self.num_classes)) self.feature_outputs = torch.nn.ModuleDict() self.features_metrics = {} for feature in self.features: self.feature_outputs[feature] = TimeDistributed( Linear(self.output_dim, vocab.get_vocab_size(feature))) self.features_metrics[feature] = { "acc": CategoricalAccuracy(), } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, judge: Model = None, update_judge: bool = False, reward_method: str = None, detach_value_head: bool = False, qa_loss_weight: float = 0., influence_reward: bool = False, theory_of_mind: bool = False) -> None: super(BertMC, self).__init__(vocab, regularizer) self.judge = judge self.is_judge = self.judge is None self.reward_method = None if self.is_judge else reward_method self.update_judge = update_judge and (self.judge is not None) self._detach_value_head = detach_value_head self._qa_loss_weight = qa_loss_weight self.influence_reward = influence_reward self.theory_of_mind = theory_of_mind self._text_field_embedder = text_field_embedder self._hidden_dim = text_field_embedder.get_output_dim() self.answer_type = 'mc' self.output_type = 'mc' self._config = self._text_field_embedder.token_embedder_tokens._modules[ 'bert_model'].config if not self.is_judge: self._sent_chosen_embeddings = torch.nn.Embedding( 2, self._config.hidden_size) self._sent_chosen_embeddings.weight.data *= 0 # Init to zero to minimally affect BERT at start self._policy_head = TimeDistributed( torch.nn.Linear(self._hidden_dim, 1)) # Can make MLP self._value_head = TimeDistributed( torch.nn.Linear(self._hidden_dim, 1)) # Can make MLP self._turn_film_gen = torch.nn.Linear(1, 2 * self._hidden_dim) self._film = FiLM() if self.theory_of_mind: final_blocks_config = deepcopy(self._config) final_blocks_config.num_hidden_layers = 1 self.final_blocks_input_proj = TimeDistributed( torch.nn.Linear(self._hidden_dim * 2, self._hidden_dim)) self.final_blocks = BertEncoder(final_blocks_config) # NOTE: Rename to self._accuracy (may break model loading) self._span_start_accuracy = CategoricalAccuracy() self._initializer = initializer
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, feedforward: FeedForward, ner_threshold: float = 0.65, max_inner_range: float = 18, metadata: List[Dict[str, Any]] = None, label_namespace: str = "ner_labels", regularizer: Optional[RegularizerApplicator] = None, initializer: InitializerApplicator = InitializerApplicator()) -> None: super(NERTagger, self).__init__(vocab, regularizer) self._include_trigger = False for label in vocab.get_token_to_index_vocabulary(label_namespace): if "trigger" in label: self._include_trigger = True self.label_namespace = label_namespace self._n_labels = self.vocab.get_vocab_size(label_namespace) # null_label = vocab.get_token_index("", label_namespace) # assert null_label == 0 self._ner_threshold = ner_threshold self._max_inner_range = max_inner_range self._ner_scorer = torch.nn.ModuleDict() self._text_field_embedder = text_field_embedder self._span_extractor = span_extractor self._ner_scorer = torch.nn.Sequential( TimeDistributed(feedforward), TimeDistributed(torch.nn.Linear( feedforward.get_output_dim(), self._n_labels))) self._relation_f1_metric = RelationMetric( vocab, tag_namespace=label_namespace, ) self._ner_metric = NERMetrics(self._n_labels) self._relation_metric = SpanRelationMetric() self._loss = torch.nn.BCEWithLogitsLoss(reduction="sum") initializer(self)
def __init__(self, vocab: Vocabulary, make_feedforward: Callable, span_emb_dim: int, feature_size: int, spans_per_word: float, positive_label_weight: float = 1.0, regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._namespaces = [ entry for entry in vocab.get_namespaces() if "relation_labels" in entry ] self._n_labels = { name: vocab.get_vocab_size(name) for name in self._namespaces } self._mention_pruners = torch.nn.ModuleDict() self._relation_feedforwards = torch.nn.ModuleDict() self._relation_scorers = torch.nn.ModuleDict() self._relation_metrics = {} for namespace in self._namespaces: mention_feedforward = make_feedforward(input_dim=span_emb_dim) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruners[namespace] = Pruner(feedforward_scorer) relation_scorer_dim = 3 * span_emb_dim relation_feedforward = make_feedforward( input_dim=relation_scorer_dim) self._relation_feedforwards[namespace] = relation_feedforward relation_scorer = torch.nn.Linear( relation_feedforward.get_output_dim(), self._n_labels[namespace]) self._relation_scorers[namespace] = relation_scorer self._relation_metrics[namespace] = RelationMetrics() self._spans_per_word = spans_per_word self._active_namespace = None self._loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1)
def __init__(self, num_turns: int, combination: str, qq_attention: MatrixAttention, qa_attention: MatrixAttention, coref_layer: Seq2SeqEncoder, use_ling: bool = False, ling_features_size: int = 0, use_mention_score=False, use_antecedent_score=False): super(BiAttContext_MultiTurn, self).__init__() self.num_turns = num_turns self.combination = combination self.qq_attention = qq_attention self.qa_attention = qa_attention self._coref_layer = coref_layer self.use_ling = True coref_output_dim = self._coref_layer.get_output_dim() coref_input_dim = self._coref_layer.get_input_dim() self.use_ling = use_ling if self.use_ling: self._coref_proj = TimeDistributed( torch.nn.Linear(coref_output_dim + ling_features_size, coref_output_dim)) if use_mention_score: self.mention_score = TimeDistributed( torch.nn.Linear(coref_output_dim, 1, bias=False)) else: self.mention_score = None if use_antecedent_score: self.antecedent_score = TimeDistributed( torch.nn.Sequential(torch.nn.Linear(coref_output_dim, 1), torch.nn.Sigmoid())) else: self.antecedent_score = None if self.combination == 'entropy+exponential': if torch.cuda.is_available(): self.entropy_combination_weight = torch.nn.Parameter( torch.cuda.FloatTensor(1), requires_grad=True) else: self.entropy_combination_weight = torch.nn.Parameter( torch.FloatTensor(1), requires_grad=True) self.q_hat_enc = TimeDistributed( torch.nn.Linear(coref_input_dim * 3, coref_input_dim))
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, parser_model_path: str, parser_cuda_device: int, freeze_parser: bool, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SyntacticEntailment, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._parser = load_archive(parser_model_path, cuda_device=parser_cuda_device).model self._parser._head_sentinel.requires_grad = False for child in self._parser.children(): for param in child.parameters(): param.requires_grad = False if not freeze_parser: for param in self._parser.encoder.parameters(): param.requires_grad = True initializer(self)
def __init__(self, vocab: Vocabulary, sentence_encoder: SentenceEncoder, qarg_ffnn: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(ClauseAndSpanToAnswerSlotModel, self).__init__(vocab, regularizer) self._sentence_encoder = sentence_encoder self._qarg_ffnn = qarg_ffnn self._clause_embedding = Embedding( vocab.get_vocab_size("abst-clause-labels"), self._qarg_ffnn.get_input_dim()) self._span_extractor = EndpointSpanExtractor( input_dim=self._sentence_encoder.get_output_dim(), combination="x,y") self._span_hidden = TimeDistributed( Linear(2 * self._sentence_encoder.get_output_dim(), self._qarg_ffnn.get_input_dim())) self._predicate_hidden = Linear( self._sentence_encoder.get_output_dim(), self._qarg_ffnn.get_input_dim()) self._qarg_predictor = Linear(self._qarg_ffnn.get_output_dim(), self.vocab.get_vocab_size("qarg-labels")) self._metric = BinaryF1()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, ignore_span_metric: bool = False) -> None: super(SemanticRoleLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, binary_feature_dim: int, initializer: InitializerApplicator, embedding_dropout: float = 0.0) -> None: super(SemanticRoleLabeler, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.stacked_encoder = stacked_encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed( Linear(self.stacked_encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) initializer(self) if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The SRL Model uses a binary verb indicator feature, meaning " "the input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder + 1.")