def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, activation: Activation = Activation.by_name('relu')(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, mixture_feedforward: FeedForward = None, dropout: float = 0.0) -> None: super().__init__(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout) self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0])) self._mixture_feedforward = mixture_feedforward if mixture_feedforward is not None: check_dimensions_match(encoder_output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim") check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value")
def from_params(cls, vocab: Vocabulary, params: Params) -> 'DecomposableAttention': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) premise_encoder_params = params.pop("premise_encoder", None) if premise_encoder_params is not None: premise_encoder = Seq2SeqEncoder.from_params(premise_encoder_params) else: premise_encoder = None hypothesis_encoder_params = params.pop("hypothesis_encoder", None) if hypothesis_encoder_params is not None: hypothesis_encoder = Seq2SeqEncoder.from_params(hypothesis_encoder_params) else: hypothesis_encoder = None attend_feedforward = FeedForward.from_params(params.pop('attend_feedforward')) similarity_function = SimilarityFunction.from_params(params.pop("similarity_function")) compare_feedforward = FeedForward.from_params(params.pop('compare_feedforward')) aggregate_feedforward = FeedForward.from_params(params.pop('aggregate_feedforward')) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, attend_feedforward=attend_feedforward, similarity_function=similarity_function, compare_feedforward=compare_feedforward, aggregate_feedforward=aggregate_feedforward, premise_encoder=premise_encoder, hypothesis_encoder=hypothesis_encoder, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, num_start_types: int, num_entity_types: int, mixture_feedforward: FeedForward = None, dropout: float = 0.0, unlinked_terminal_indices: List[int] = None) -> None: super(WikiTablesDecoderStep, self).__init__() self._mixture_feedforward = mixture_feedforward self._entity_type_embedding = Embedding(num_entity_types, action_embedding_dim) self._input_attention = input_attention self._num_start_types = num_start_types self._start_type_predictor = Linear(encoder_output_dim, num_start_types) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. output_dim = encoder_output_dim input_dim = output_dim # Our decoder input will be the concatenation of the decoder hidden state and the previous # action embedding, and we'll project that down to the decoder's `input_dim`, which we # arbitrarily set to be the same as `output_dim`. self._input_projection_layer = Linear(output_dim + action_embedding_dim, input_dim) # Before making a prediction, we'll compute an attention over the input given our updated # hidden state. Then we concatenate those with the decoder state and project to # `action_embedding_dim` to make a prediction. self._output_projection_layer = Linear(output_dim + encoder_output_dim, action_embedding_dim) if unlinked_terminal_indices is not None: # This means we are using coverage to train the parser. # These factors are used to add the embeddings of yet to be produced actions to the # predicted embedding, and to boost the action logits of yet to be produced linked # actions, respectively. self._unlinked_checklist_multiplier = Parameter(torch.FloatTensor([1.0])) self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0])) self._unlinked_terminal_indices = unlinked_terminal_indices # TODO(pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(input_dim, output_dim) if mixture_feedforward is not None: check_dimensions_match(output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim") check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value") if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward_layer: FeedForward = None, pos_tag_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = None) -> None: super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed(feedforward_layer) if feedforward_layer else None self.pos_tag_embedding = pos_tag_embedding or None if feedforward_layer is not None: output_dim = feedforward_layer.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes)) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "representation dim (tokens + optional POS tags)", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), span_extractor.get_input_dim(), "encoder input dim", "span extractor input dim") if feedforward_layer is not None: check_dimensions_match(span_extractor.get_output_dim(), feedforward_layer.get_input_dim(), "span extractor output dim", "feedforward input dim") self.tag_accuracy = CategoricalAccuracy() if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SpanConstituencyParser': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) span_extractor = SpanExtractor.from_params(params.pop("span_extractor")) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) feed_forward_params = params.pop("feedforward", None) if feed_forward_params is not None: feedforward_layer = FeedForward.from_params(feed_forward_params) else: feedforward_layer = None pos_tag_embedding_params = params.pop("pos_tag_embedding", None) if pos_tag_embedding_params is not None: pos_tag_embedding = Embedding.from_params(vocab, pos_tag_embedding_params) else: pos_tag_embedding = None initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) evalb_directory_path = params.pop("evalb_directory_path", None) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, span_extractor=span_extractor, encoder=encoder, feedforward_layer=feedforward_layer, pos_tag_embedding=pos_tag_embedding, initializer=initializer, regularizer=regularizer, evalb_directory_path=evalb_directory_path)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CoreferenceResolver, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = SpanPruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed(torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) self._endpoint_span_extractor = EndpointSpanExtractor(context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor(input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'EtdBCN': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab=vocab, params=embedder_params) title_text_encoder = Seq2SeqEncoder.from_params( params.pop("title_text_encoder")) abstract_text_encoder = Seq2SeqEncoder.from_params( params.pop("abstract_text_encoder")) title_text_projection = FeedForward.from_params( params.pop("title_text_projection")) abstract_text_projection = FeedForward.from_params( params.pop("abstract_text_projection")) bi_attention_encoder = BiAttentionEncoder.from_params( params.pop("attention_encoder")) classifier_feedforward = params.pop("classifier_feedforward") if classifier_feedforward.pop('type') == 'feedforward': classifier_feedforward = FeedForward.from_params( classifier_feedforward) else: classifier_feedforward = Maxout.from_params(classifier_feedforward) use_positional_encoding = params.pop("use_positional_encoding", False) bce_pos_weight = params.pop_int("bce_pos_weight", 10) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, title_text_encoder=title_text_encoder, abstract_text_encoder=abstract_text_encoder, title_text_projection=title_text_projection, abstract_text_projection=abstract_text_projection, bi_attention_encoder=bi_attention_encoder, classifier_feedforward=classifier_feedforward, bce_pos_weight=bce_pos_weight, use_positional_encoding=use_positional_encoding, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, bert: TextFieldEmbedder, classifier: FeedForward, weighted_training: bool = False, dropout: float = 0.1, num_labels: int = None, metrics: List[str] = ['acc'], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._bert = bert self._dropout = torch.nn.Dropout(dropout) self._classifier = classifier if num_labels is None: self._num_labels = vocab.get_vocab_size(namespace="labels") else: self._num_labels = num_labels self._pooler = FeedForward(input_dim=bert.get_output_dim(), num_layers=1, hidden_dims=bert.get_output_dim(), activations=torch.tanh) check_dimensions_match(bert.get_output_dim(), classifier.get_input_dim(), "bert output dim", "classifier input dim") check_dimensions_match(classifier.get_output_dim(), self._num_labels, "classifier output dim", "number of labels") self.metrics = metrics self._accuracy = CategoricalAccuracy() if 'f1' in self.metrics: self._f1 = F1Measure(positive_label=1) self.weighted_training = weighted_training if not weighted_training: self._loss = torch.nn.CrossEntropyLoss() else: self._loss = torch.nn.CrossEntropyLoss(reduce='none') initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") if text_field_embedder.get_output_dim( ) != attend_feedforward.get_input_dim(): raise ConfigurationError( "Output dimension of the text_field_embedder (dim: {}), " "must match the input_dim of the FeedForward layer " "attend_feedforward, (dim: {}). ".format( text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim())) if aggregate_feedforward.get_output_dim() != self._num_labels: raise ConfigurationError( "Final output dimension (%d) must equal num labels (%d)" % (aggregate_feedforward.get_output_dim(), self._num_labels)) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'DecomposableAttention': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab, embedder_params) premise_encoder_params = params.pop("premise_encoder", None) if premise_encoder_params is not None: premise_encoder = Seq2SeqEncoder.from_params( premise_encoder_params) else: premise_encoder = None hypothesis_encoder_params = params.pop("hypothesis_encoder", None) if hypothesis_encoder_params is not None: hypothesis_encoder = Seq2SeqEncoder.from_params( hypothesis_encoder_params) else: hypothesis_encoder = None attend_feedforward = FeedForward.from_params( params.pop('attend_feedforward')) similarity_function = SimilarityFunction.from_params( params.pop("similarity_function")) compare_feedforward = FeedForward.from_params( params.pop('compare_feedforward')) aggregate_feedforward = FeedForward.from_params( params.pop('aggregate_feedforward')) initializer = InitializerApplicator.from_params( params.pop("initializer", [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, attend_feedforward=attend_feedforward, similarity_function=similarity_function, compare_feedforward=compare_feedforward, aggregate_feedforward=aggregate_feedforward, initializer=initializer, premise_encoder=premise_encoder, hypothesis_encoder=hypothesis_encoder)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, preload_path: Optional[str] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = MatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder # self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") # check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, # "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self) # Do we want to initialize with the SNLI stuff? let's say yes. # 'snli-decomposable-attention/weights.th' if preload_path is not None: logger.info("Preloading!") preload = torch.load(preload_path) own_state = self.state_dict() for name, param in preload.items(): if name not in own_state: logger.info("Unexpected key {} in state_dict with size {}".format(name, param.size())) elif param.size() == own_state[name].size(): own_state[name].copy_(param) else: logger.info("Network has {} with size {}, ckpt has {}".format(name, own_state[name].size(), param.size())) missing = set(own_state.keys()) - set(preload.keys()) if len(missing) > 0: logger.info("We couldn't find {}".format(','.join(missing)))
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BiattentiveClassificationNetwork': # type: ignore # pylint: disable=arguments-differ embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab=vocab, params=embedder_params) embedding_dropout = params.pop("embedding_dropout") pre_encode_feedforward = FeedForward.from_params(params.pop("pre_encode_feedforward")) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) integrator = Seq2SeqEncoder.from_params(params.pop("integrator")) integrator_dropout = params.pop("integrator_dropout") output_layer_params = params.pop("output_layer") if "activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) elmo = params.pop("elmo", None) if elmo is not None: elmo = Elmo.from_params(elmo) use_input_elmo = params.pop_bool("use_input_elmo", False) use_integrator_output_elmo = params.pop_bool("use_integrator_output_elmo", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, encoder=encoder, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, elmo=elmo, use_input_elmo=use_input_elmo, use_integrator_output_elmo=use_integrator_output_elmo, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, bert: TextFieldEmbedder, classifier: FeedForward, dropout: float = 0.1, pooling: str = 'mean', pooler: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.pooling = pooling self._bert = bert self._classifier = classifier if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None if pooling == 'cat': bert_out_dim = bert.get_output_dim() * 2 else: bert_out_dim = bert.get_output_dim() self.pooler = pooler if pooler: self._pooler = FeedForward(input_dim=bert_out_dim, num_layers=1, hidden_dims=bert_out_dim, activations=torch.tanh) check_dimensions_match(bert_out_dim, classifier.get_input_dim(), "bert embedding dim", "classifier input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def from_params( cls, vocab: Vocabulary, params: Params ) -> 'DialogueContextHierarchicalCoherenceAttentionClassifier': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab, embedder_params) utterance_encoder = Seq2VecEncoder.from_params( params.pop("utterance_encoder")) attend_feedforward = FeedForward.from_params( params.pop('attend_feedforward')) #similarity_function = SimilarityFunction.from_params(params.pop("similarity_function")) compare_feedforward = FeedForward.from_params( params.pop('compare_feedforward')) classifier_feedforward = FeedForward.from_params( params.pop("classifier_feedforward")) final_classifier_feedforward = FeedForward.from_params( params.pop("final_classifier_feedforward")) initializer = InitializerApplicator.from_params( params.pop("initializer", [])) regularizer = RegularizerApplicator.from_params( params.pop("regularizer", [])) #matrix_attention = MatrixAttention().from_params(params.pop("similarity_function")) matrix_attention = MultiHeadSelfAttention.from_params( params.pop("similarity_function")) return cls(vocab=vocab, text_field_embedder=text_field_embedder, attend_feedforward=attend_feedforward, matrix_attention=matrix_attention, compare_feedforward=compare_feedforward, classifier_feedforward=classifier_feedforward, final_classifier_feedforward=final_classifier_feedforward, utterance_encoder=utterance_encoder, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, matrix_attention: MatrixAttention, compare_feedforward: FeedForward, classifier_feedforward: FeedForward, final_classifier_feedforward: FeedForward, utterance_encoder: Seq2VecEncoder, context_encoder: Seq2SeqEncoder, response_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DialogueContextHierarchicalCoherenceAttentionClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = vocab.get_vocab_size("labels") self.utterances_encoder = TimeDistributed(utterance_encoder) self.context_encoder = context_encoder self.response_encoder = response_encoder self.attend_feedforward = TimeDistributed(attend_feedforward) self.matrix_attention = matrix_attention self.compare_feedforward = TimeDistributed(compare_feedforward) self.classifier_feedforward = classifier_feedforward self.final_classifier_feedforward = final_classifier_feedforward labels = self.vocab.get_index_to_token_vocabulary('labels') pos_label_index = list(labels.keys())[list(labels.values()).index('neg')] check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(classifier_feedforward.get_output_dim(), self.num_classes, "final output dimension", "number of labels") self.metrics = { "accuracy": CategoricalAccuracy() # "f1": F1Measure(positive_label=pos_label_index) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, token_embedder: TokenEmbedder, num_labels: int) -> None: super().__init__(vocab) self._text_field_embedder = BasicTextFieldEmbedder( {"tokens": token_embedder}) dim = token_embedder.get_output_dim() self._attend_feedforward = TimeDistributed( FeedForward(dim, 1, 100, torch.nn.ReLU(), 0.2)) self._matrix_attention = DotProductMatrixAttention() self._compare_feedforward = TimeDistributed( FeedForward(dim * 2, 1, 100, torch.nn.ReLU(), 0.2)) # linear denotes "lambda x: x" self._aggregate_feedforward = FeedForward(200, 1, num_labels, PassThrough(), 0.0) self._num_labels = num_labels self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss()
def from_params(cls, vocab: Vocabulary, params: Params) -> 'StackedNNAggregateCustom': embedder_params = params.pop("text_field_embedder") text_field_embedder = BasicTextFieldEmbedder.from_params( vocab, embedder_params) embeddings_dropout_value = params.pop("embeddings_dropout", 0.0) share_encoders = params.pop("share_encoders", False) # premise encoder premise_encoder_params = params.pop("premise_encoder", None) premise_enc_aggregate = params.pop("premise_encoder_aggregate", "max") if premise_encoder_params is not None: premise_encoder = Seq2SeqEncoder.from_params( premise_encoder_params) else: premise_encoder = None # hypothesis encoder if share_encoders: hypothesis_enc_aggregate = premise_enc_aggregate hypothesis_encoder = premise_encoder else: hypothesis_encoder_params = params.pop("hypothesis_encoder", None) hypothesis_enc_aggregate = params.pop( "hypothesis_encoder_aggregate", "max") if hypothesis_encoder_params is not None: hypothesis_encoder = Seq2SeqEncoder.from_params( hypothesis_encoder_params) else: hypothesis_encoder = None aggregate_feedforward = FeedForward.from_params( params.pop('aggregate_feedforward')) init_params = params.pop('initializer', None) initializer = (InitializerApplicator.from_params(init_params) if init_params is not None else InitializerApplicator()) return cls(vocab=vocab, text_field_embedder=text_field_embedder, aggregate_feedforward=aggregate_feedforward, premise_encoder=premise_encoder, hypothesis_encoder=hypothesis_encoder, initializer=initializer, aggregate_hypothesis=hypothesis_enc_aggregate, aggregate_premise=premise_enc_aggregate, embeddings_dropout_value=embeddings_dropout_value, share_encoders=share_encoders)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dropout: float = 0.0, input_dropout: float = 0.0, label_smoothing: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SentimentClassifier, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder share_rnn = nn.LSTM(input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, # dropout=dropout, bidirectional=True) share_encoder = PytorchSeq2SeqWrapper(share_rnn) self._encoder = RNNEncoder(vocab, share_encoder, input_dropout, regularizer) self._seq_vec = CnnEncoder(self._encoder.get_output_dim(), 25) self._de_dim = len(TASKS_NAME) weight = torch.empty(self._de_dim, self._text_field_embedder.get_output_dim()) torch.nn.init.orthogonal_(weight) self._domain_embeddings = Embedding(self._de_dim, self._text_field_embedder.get_output_dim(), weight=weight) self._de_attention = BilinearAttention(self._seq_vec.get_output_dim(), self._domain_embeddings.get_output_dim()) self._de_feedforward = FeedForward(self._domain_embeddings.get_output_dim(), 1, self._seq_vec.get_output_dim(), Activation.by_name("elu")()) self._num_classes = self.vocab.get_vocab_size("label") self._sentiment_discriminator = Discriminator(self._seq_vec.get_output_dim(), self._num_classes) self._s_domain_discriminator = Discriminator(self._seq_vec.get_output_dim(), len(TASKS_NAME)) self._valid_discriminator = Discriminator(self._domain_embeddings.get_output_dim(), 2) self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._label_smoothing = label_smoothing self.metrics = { "s_domain_acc": CategoricalAccuracy(), "valid_acc": CategoricalAccuracy() } for task_name in TASKS_NAME: self.metrics["{}_stm_acc".format(task_name)] = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._domain_loss = torch.nn.CrossEntropyLoss() # TODO torch.nn.BCELoss self._valid_loss = torch.nn.BCEWithLogitsLoss() initializer(self)
def __init__( self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, input_attention_activation: Activation = None, activation: Activation = Activation.by_name("relu")(), predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, mixture_feedforward: FeedForward = None, dropout: float = 0.0, num_layers: int = 1, ) -> None: super().__init__( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, input_attention_activation=input_attention_activation, num_start_types=num_start_types, activation=activation, predict_start_type_separately=predict_start_type_separately, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers, ) self._mixture_feedforward = mixture_feedforward if mixture_feedforward is not None: check_dimensions_match( encoder_output_dim, mixture_feedforward.get_input_dim(), "hidden state embedding dim", "mixture feedforward input dim", ) check_dimensions_match(mixture_feedforward.get_output_dim(), 1, "mixture feedforward output dim", "dimension for scalar value")
def __init__( self, vocab: Vocabulary, bert: TextFieldEmbedder, classifier: FeedForward, model_type: str = None, # None, 'first', 'reinforce' dropout: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.model_type = model_type self._bert = bert self._classifier = classifier if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._pooler = FeedForward(input_dim=bert.get_output_dim(), num_layers=1, hidden_dims=bert.get_output_dim(), activations=torch.tanh) if model_type is None: bert_out_dim = bert.get_output_dim() * 2 else: bert_out_dim = bert.get_output_dim() * 3 check_dimensions_match(bert_out_dim, classifier.get_input_dim(), "bert embedding dim", "classifier input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, dim_reduce_layer: FeedForward = None, separate: bool = False, repr_layer: FeedForward = None, pair: bool = True, combine: str = 'concat', dist_emb_size: int = None) -> None: super(SpanPairPairedLayer, self).__init__() self.inp_dim, self.out_dim = None, None self.pair = pair self.combine = combine assert combine in {'concat', 'coref'} # 'coref' means using concat + dot + width if combine == 'coref': self.num_distance_buckets = 10 self.distance_embedding = Embedding(self.num_distance_buckets, dist_emb_size) self.dim_reduce_layer1 = self.dim_reduce_layer2 = dim_reduce_layer if dim_reduce_layer is not None: self.inp_dim = self.inp_dim or dim_reduce_layer.get_input_dim() self.out_dim = dim_reduce_layer.get_output_dim() self.dim_reduce_layer1 = TimeDistributed(dim_reduce_layer) if separate: self.dim_reduce_layer2 = copy.deepcopy(self.dim_reduce_layer1) else: self.dim_reduce_layer2 = self.dim_reduce_layer1 if pair: self.out_dim *= 2 self.repr_layer = None if repr_layer is not None: if not pair: raise Exception('MLP needs paired input') self.inp_dim = self.inp_dim or repr_layer.get_input_dim() self.out_dim = repr_layer.get_output_dim() self.repr_layer = TimeDistributed(repr_layer)
def test_forward_gives_correct_output(self): params = Params({"input_dim": 2, "hidden_dims": 3, "activations": "relu", "num_layers": 2}) feedforward = FeedForward.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(feedforward) input_tensor = torch.FloatTensor([[-3, 1]]) output = feedforward(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand - ReLU makes output after first hidden layer [0, 0, 0], # which then gets a bias added in the second layer to be [1, 1, 1]. assert_almost_equal(output, [[1, 1, 1]])
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BigramEmbedderDimwiseWeightedSum': mapping_layer_params = params.pop("mapping_layer") mapping_layer = FeedForward.from_params(mapping_layer_params) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) return cls(vocab=vocab, mapping_layer=mapping_layer, initializer=initializer, regularizer=regularizer)
def from_params(cls, vocab, params: Params) -> 'WikiTablesErmSemanticParser': question_embedder = TextFieldEmbedder.from_params( vocab, params.pop("question_embedder")) action_embedding_dim = params.pop_int("action_embedding_dim") encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) entity_encoder = Seq2VecEncoder.from_params( params.pop('entity_encoder')) mixture_feedforward_type = params.pop('mixture_feedforward', None) if mixture_feedforward_type is not None: mixture_feedforward = FeedForward.from_params( mixture_feedforward_type) else: mixture_feedforward = None input_attention = Attention.from_params(params.pop("attention")) decoder_beam_size = params.pop_int("decoder_beam_size") decoder_num_finished_states = params.pop_int( "decoder_num_finished_states", None) max_decoding_steps = params.pop_int("max_decoding_steps") normalize_beam_score_by_length = params.pop( "normalize_beam_score_by_length", False) use_neighbor_similarity_for_linking = params.pop_bool( "use_neighbor_similarity_for_linking", False) dropout = params.pop_float('dropout', 0.0) num_linking_features = params.pop_int('num_linking_features', 10) tables_directory = params.pop('tables_directory', '/wikitables/') rule_namespace = params.pop('rule_namespace', 'rule_labels') checklist_cost_weight = params.pop_float("checklist_cost_weight", 0.6) mml_model_file = params.pop('mml_model_file', None) params.assert_empty(cls.__name__) return cls( vocab, question_embedder=question_embedder, action_embedding_dim=action_embedding_dim, encoder=encoder, entity_encoder=entity_encoder, mixture_feedforward=mixture_feedforward, input_attention=input_attention, decoder_beam_size=decoder_beam_size, decoder_num_finished_states=decoder_num_finished_states, max_decoding_steps=max_decoding_steps, normalize_beam_score_by_length=normalize_beam_score_by_length, checklist_cost_weight=checklist_cost_weight, use_neighbor_similarity_for_linking= use_neighbor_similarity_for_linking, dropout=dropout, num_linking_features=num_linking_features, tables_directory=tables_directory, rule_namespace=rule_namespace, initial_mml_model_file=mml_model_file)
def __init__(self, tensor_1_dim: int, tensor_2_dim: int, combination: str = 'x,y', feedforward_params=None) -> None: super(LinearExtendedFeedForwardReprCombination, self).__init__() self._combination = combination # aggregate knowledge input state is inferred automatically combined_dim = get_combined_dim(combination, [tensor_1_dim, tensor_2_dim]) update_params(feedforward_params, {"input_dim": combined_dim}) self._feedforward_layer = FeedForward.from_params(feedforward_params)
def __init__(self, dim_reduce_layer: FeedForward = None, separate: bool = False, repr_layer: FeedForward = None) -> None: super(SpanPairLayer, self).__init__() self.inp_dim, self.out_dim = None, None self.dim_reduce_layer1 = self.dim_reduce_layer2 = dim_reduce_layer if dim_reduce_layer is not None: self.inp_dim = self.inp_dim or dim_reduce_layer.get_input_dim() self.out_dim = dim_reduce_layer.get_output_dim() self.dim_reduce_layer1 = TimeDistributed(dim_reduce_layer) if separate: self.dim_reduce_layer2 = copy.deepcopy(self.dim_reduce_layer1) else: self.dim_reduce_layer2 = self.dim_reduce_layer1 self.repr_layer = None if repr_layer is not None: self.inp_dim = self.inp_dim or repr_layer.get_input_dim() self.out_dim = repr_layer.get_output_dim() self.repr_layer = TimeDistributed(repr_layer)
def from_params(cls, vocab: Vocabulary, params: Params) -> "CoreferenceResolver": embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab, embedder_params) context_layer = Seq2SeqEncoder.from_params(params.pop("context_layer")) mention_feedforward = FeedForward.from_params( params.pop("mention_feedforward")) antecedent_feedforward = FeedForward.from_params( params.pop("antecedent_feedforward")) feature_size = params.pop("feature_size") max_span_width = params.pop("max_span_width") spans_per_word = params.pop("spans_per_word") max_antecedents = params.pop("max_antecedents") lexical_dropout = params.pop("lexical_dropout", 0.2) init_params = params.pop("initializer", None) reg_params = params.pop("regularizer", None) initializer = (InitializerApplicator.from_params(init_params) if init_params is not None else InitializerApplicator()) regularizer = RegularizerApplicator.from_params( reg_params) if reg_params is not None else None params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, context_layer=context_layer, mention_feedforward=mention_feedforward, antecedent_feedforward=antecedent_feedforward, feature_size=feature_size, max_span_width=max_span_width, spans_per_word=spans_per_word, max_antecedents=max_antecedents, lexical_dropout=lexical_dropout, initializer=initializer, regularizer=regularizer)
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout_p: int, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.embedding2input = FeedForward( input_dim=word_embeddings.get_output_dim(), num_layers=1, hidden_dims=encoder.get_input_dim(), activations=Activation.by_name('relu')(), dropout=dropout_p) self.encoder = encoder self.hidden2intermediate = FeedForward( input_dim=encoder.get_output_dim(), num_layers=1, hidden_dims=int(encoder.get_output_dim() / 2), activations=Activation.by_name('relu')(), dropout=dropout_p) self.intermediate2tag = nn.Linear( in_features=int(encoder.get_output_dim() / 2), out_features=vocab.get_vocab_size('labels')) # self.accuracy = CategoricalAccuracy() label_vocab = vocab.get_token_to_index_vocabulary('labels').copy() # print("label_vocab: ", label_vocab) [label_vocab.pop(x) for x in ['O', 'OR']] labels_for_metric = list(label_vocab.values()) # print("labels_for_metric: ", labels_for_metric) self.accuracy = CustomFBetaMeasure(beta=1.0, average='micro', labels=labels_for_metric)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, output_logit: FeedForward, dropout: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._output_logit = output_logit self._encoder = encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None if encoder: check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), output_logit.get_input_dim(), "encoder output dim", "output_logit input dim") else: check_dimensions_match(text_field_embedder.get_output_dim(), output_logit.get_input_dim(), "text field embedding dim", "output_logit input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def test_can_construct_from_params(self): params = Params({ "input_dim": 2, "hidden_dims": 3, "activations": "relu", "num_layers": 2 }) feedforward = FeedForward.from_params(params) assert len(feedforward._activations) == 2 assert [isinstance(a, torch.nn.ReLU) for a in feedforward._activations] assert len(feedforward._linear_layers) == 2 assert [ layer.weight.size(-1) == 3 for layer in feedforward._linear_layers ] params = Params({ "input_dim": 2, "hidden_dims": [3, 4, 5], "activations": ["relu", "relu", "linear"], "dropout": 0.2, "num_layers": 3, }) feedforward = FeedForward.from_params(params) assert len(feedforward._activations) == 3 assert isinstance(feedforward._activations[0], torch.nn.ReLU) assert isinstance(feedforward._activations[1], torch.nn.ReLU) # It's hard to check that the last activation is the lambda function we use for `linear`, # so this is good enough. assert not isinstance(feedforward._activations[2], torch.nn.ReLU) assert len(feedforward._linear_layers) == 3 assert feedforward._linear_layers[0].weight.size(0) == 3 assert feedforward._linear_layers[1].weight.size(0) == 4 assert feedforward._linear_layers[2].weight.size(0) == 5 assert len(feedforward._dropout) == 3 assert [d.p == 0.2 for d in feedforward._dropout]
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SarcasmClassifier': embedder_params1 = params.pop("text_field_embedder") embedder_params2 = params.pop("elmo_text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(embedder_params1, vocab=vocab) elmo_text_field_embedder = TextFieldEmbedder.from_params( embedder_params2, vocab=vocab) quote_response_encoder = Seq2SeqEncoder.from_params( params.pop("quote_response_encoder")) quote_response_encoder_aux = Seq2VecEncoder.from_params( params.pop("quote_response_encoder_aux")) classifier_feedforward = FeedForward.from_params( params.pop("classifier_feedforward")) classifier_feedforward_2 = FeedForward.from_params( params.pop("classifier_feedforward_2")) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) report_auxiliary_metrics = params.pop_bool("report_auxiliary_metrics", False) # predict_mode = params.pop_bool("predict_mode", False) # print(f"pred mode: {predict_mode}") return cls(vocab=vocab, text_field_embedder=text_field_embedder, elmo_text_field_embedder=elmo_text_field_embedder, quote_response_encoder=quote_response_encoder, quote_response_encoder_aux=quote_response_encoder_aux, classifier_feedforward=classifier_feedforward, classifier_feedforward_2=classifier_feedforward_2, initializer=initializer, regularizer=regularizer, report_auxiliary_metrics=report_auxiliary_metrics)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BiattentiveClassificationNetwork': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) embedding_dropout = params.pop("embedding_dropout") pre_encode_feedforward = FeedForward.from_params(params.pop("pre_encode_feedforward")) # encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) # small_encoder = Seq2SeqEncoder.from_params(params.pop("small_encoder")) big_dim = params.pop("big_dim") small_dim = params.pop("small_dim") gamma = params.pop("gamma") integrator = Seq2SeqEncoder.from_params(params.pop("integrator")) integrator_dropout = params.pop("integrator_dropout") output_layer_params = params.pop("output_layer") if "activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, big_dim=big_dim, small_dim=small_dim, # encoder=encoder, # small_encoder=small_encoder, gamma=gamma, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, initializer=initializer, regularizer=regularizer)
def __init__( self, input_dim: int, num_heads: int = 8, attention_dim: Optional[int] = None, value_dim: Optional[int] = None, feedforward_hidden_dim: int = None, residual_dropout: float = 0.1, attention_dropout: float = 0.1, feedforward_dropout: float = 0.1, use_vanilla_wiring: bool = False, ): super(UTDecBlock, self).__init__() hidden_dim = input_dim attention_dim = attention_dim or (hidden_dim // num_heads) value_dim = value_dim or (hidden_dim // num_heads) feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim self._masked_attention = MaskedMultiHeadSelfAttention( num_heads, hidden_dim, attention_dim * num_heads, value_dim * num_heads, attention_dropout=attention_dropout) self._masked_attention_norm = LayerNorm(hidden_dim) self._attention = MultiHeadAttention( num_heads, hidden_dim, hidden_dim, attention_dim * num_heads, value_dim * num_heads, attention_dropout=attention_dropout) self._dropout = torch.nn.Dropout(residual_dropout) self._attention_norm = LayerNorm(hidden_dim) # use feedforward net as transition function self._feedforward = FeedForward( hidden_dim, num_layers=2, hidden_dims=[feedforward_hidden_dim, hidden_dim], activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], dropout=feedforward_dropout) self._feedforward_norm = LayerNorm(hidden_dim) self._use_vanilla_wiring = use_vanilla_wiring
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, pos_tag_embedding: Embedding = None, users_embedding: Embedding = None, dropout: float = 0.1, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None) -> None: super().__init__(vocab, regularizer) self._label_namespace = label_namespace self._dropout = Dropout(dropout) self._text_field_embedder = text_field_embedder self._pos_tag_embedding = pos_tag_embedding or None representation_dim = self._text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += self._pos_tag_embedding.get_output_dim() self._report_cnn = CnnEncoder(representation_dim, 25) self._comment_cnn = CnnEncoder(representation_dim, 25) lstm_input_dim = self._comment_cnn.get_output_dim() self._user_embedding = users_embedding or None if users_embedding is not None: lstm_input_dim += self._user_embedding.get_output_dim() rnn = nn.LSTM(input_size=lstm_input_dim, hidden_size=150, batch_first=True, bidirectional=True) self._encoder = PytorchSeq2SeqWrapper(rnn) self._seq2vec = CnnEncoder(self._encoder.get_output_dim(), 25) self._num_class = self.vocab.get_vocab_size(self._label_namespace) self._bilinear_sim = BilinearSimilarity(self._encoder.get_output_dim(), self._encoder.get_output_dim()) self._projector = FeedForward(self._seq2vec.get_output_dim(), 2, [50, self._num_class], Activation.by_name("sigmoid")(), dropout) self._golden_instances = None self._golden_instances_labels = None self._golden_instances_id = None self._metrics = { "accuracy": CategoricalAccuracy(), "f-measure": F1Measure( positive_label=vocab.get_token_index("feature", "labels")), } self._loss = torch.nn.CrossEntropyLoss() self._contrastive_loss = ContrastiveLoss() self._mse_loss = torch.nn.MSELoss() initializer(self)
def from_params(cls, vocab, params: Params) -> 'WikiTablesMmlSemanticParser': question_embedder = TextFieldEmbedder.from_params( vocab, params.pop("question_embedder")) action_embedding_dim = params.pop_int("action_embedding_dim") encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) entity_encoder = Seq2VecEncoder.from_params( params.pop('entity_encoder')) max_decoding_steps = params.pop_int("max_decoding_steps") mixture_feedforward_type = params.pop('mixture_feedforward', None) if mixture_feedforward_type is not None: mixture_feedforward = FeedForward.from_params( mixture_feedforward_type) else: mixture_feedforward = None decoder_beam_search = BeamSearch.from_params( params.pop("decoder_beam_search")) # If no attention function is specified, we should not use attention, not attention with # default similarity function. attention_function_type = params.pop("attention_function", None) if attention_function_type is not None: attention_function = SimilarityFunction.from_params( attention_function_type) else: attention_function = None training_beam_size = params.pop_int('training_beam_size', None) use_neighbor_similarity_for_linking = params.pop_bool( 'use_neighbor_similarity_for_linking', False) dropout = params.pop_float('dropout', 0.0) num_linking_features = params.pop_int('num_linking_features', 10) tables_directory = params.pop('tables_directory', '/wikitables/') rule_namespace = params.pop('rule_namespace', 'rule_labels') params.assert_empty(cls.__name__) return cls(vocab, question_embedder=question_embedder, action_embedding_dim=action_embedding_dim, encoder=encoder, entity_encoder=entity_encoder, mixture_feedforward=mixture_feedforward, decoder_beam_search=decoder_beam_search, max_decoding_steps=max_decoding_steps, attention_function=attention_function, training_beam_size=training_beam_size, use_neighbor_similarity_for_linking= use_neighbor_similarity_for_linking, dropout=dropout, num_linking_features=num_linking_features, tables_directory=tables_directory, rule_namespace=rule_namespace)
def __init__(self, vocab, encoder: FeedForward, mean_projection: FeedForward, log_variance_projection: FeedForward, decoder: FeedForward, apply_batchnorm: bool = False, z_dropout: float = 0.2) -> None: super(LogisticNormal, self).__init__(vocab) self.encoder = encoder self.mean_projection = mean_projection self.log_variance_projection = log_variance_projection self._decoder = torch.nn.Linear(decoder.get_input_dim(), decoder.get_output_dim(), bias=False) self._z_dropout = torch.nn.Dropout(z_dropout) self.latent_dim = mean_projection.get_output_dim() # If specifiied, established batchnorm for both mean and log variance. self._apply_batchnorm = apply_batchnorm if apply_batchnorm: self.mean_bn = torch.nn.BatchNorm1d(self.latent_dim, eps=0.001, momentum=0.001, affine=True) self.mean_bn.weight.data.copy_(torch.ones(self.latent_dim)) self.mean_bn.weight.requires_grad = False self.log_var_bn = torch.nn.BatchNorm1d(self.latent_dim, eps=0.001, momentum=0.001, affine=True) self.log_var_bn.weight.data.copy_(torch.ones(self.latent_dim)) self.log_var_bn.weight.requires_grad = False
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'hidden_dims': 3, 'activations': 'relu', 'num_layers': 2 }) feedforward = FeedForward.from_params(params) constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.) initializer = InitializerApplicator([(".*", constant_init)]) initializer(feedforward) input_tensor = torch.FloatTensor([[-3, 1]]) output = feedforward(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand - ReLU makes output after first hidden layer [0, 0, 0], # which then gets a bias added in the second layer to be [1, 1, 1]. assert_almost_equal(output, [[1, 1, 1]])