def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder self.vocab = vocab self.label_vocab = vocab.get_index_to_token_vocabulary( namespace='labels') inf_vec = torch.Tensor([float('-inf')] * encoder.get_input_dim()) self.class_avgs = [ inf_vec.clone() for i in range(len(self.label_vocab)) ] self.accuracy = CategoricalAccuracy() self.debug = False if self.debug: print("===MODEL DEBUG===") print( "Number of embeddings:", self.word_embeddings._token_embedders['tokens'].num_embeddings) # print("Token embedders:", self.word_embeddings._token_embedders) # print("Embedding weights", self.word_embeddings._token_embedders['tokens'].weight) print("vocab:", vocab) print("===MODEL DEBUG===")
def __init__( self, encoder: Seq2SeqEncoder, projection: bool = True, ) -> None: super().__init__(stateful=encoder.stateful) self._input_dim = encoder.get_input_dim() self._encoder = encoder self._projection: Optional[torch.nn.Module] = None if projection: self._projection = TimeDistributed( # type: ignore torch.nn.Linear( encoder.get_output_dim(), encoder.get_input_dim(), )) else: check_dimensions_match( self._encoder.get_input_dim(), self._encoder.get_output_dim(), "encoder input dim", "encoder output dim", )
def __init__( self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, emb_to_enc_proj: FeedForward = None, feedforward: FeedForward = None, dropout: float = 0.0, num_tags: int = 2, use_crf: bool = False, ): super().__init__(vocab) self.embedder = embedder self.emb_to_enc_proj = None if emb_to_enc_proj is not None: self.emb_to_enc_proj = emb_to_enc_proj self.encoder = encoder assert (embedder.get_output_dim() == encoder.get_input_dim() or emb_to_enc_proj is not None and emb_to_enc_proj.get_output_dim() == encoder.get_input_dim()) self.feedforward = None pre_output_dim = encoder.get_output_dim() if feedforward is not None: assert feedforward.get_input_dim() == encoder.get_output_dim() self.feedforward = feedforward pre_output_dim = self.feedforward.get_output_dim() self.hidden2tag = torch.nn.Linear(in_features=pre_output_dim, out_features=num_tags) self.dropout = torch.nn.Dropout(dropout) self.accuracy = CategoricalAccuracy() self.f1 = F1Measure(1) self.use_crf = use_crf if use_crf: self.crf = ConditionalRandomField( num_tags, include_start_end_transitions=True)
def __init__( self, highway_encoder: Seq2SeqEncoder, transform_gate_encoder: Seq2SeqEncoder, carry_gate_encoder: Optional[Seq2SeqEncoder] = None, projection: bool = True, ) -> None: stateful = highway_encoder.stateful or transform_gate_encoder.stateful check_dimensions_match( highway_encoder.get_input_dim(), transform_gate_encoder.get_input_dim(), "highway_encoder input dim", "transform_gate_encoder input dim", ) if carry_gate_encoder is not None: stateful = stateful or carry_gate_encoder.stateful check_dimensions_match( highway_encoder.get_input_dim(), carry_gate_encoder.get_input_dim(), "highway_encoder input dim", "carry_gate_encoder input dim", ) super().__init__(stateful=stateful) self._input_dim = highway_encoder.get_input_dim() self._highway_encoder = highway_encoder self._transform_gate_encoder = transform_gate_encoder self._carry_gate_encoder = carry_gate_encoder self._highway_projection: Optional[torch.nn.Module] = None self._transform_gate_projection: Optional[torch.nn.Module] = None self._carry_gate_projection: Optional[torch.nn.Module] = None if projection: self._highway_projection = TimeDistributed( # type: ignore torch.nn.Linear( highway_encoder.get_output_dim(), highway_encoder.get_input_dim(), )) self._transform_gate_projection = TimeDistributed( # type: ignore torch.nn.Linear( transform_gate_encoder.get_output_dim(), transform_gate_encoder.get_input_dim(), ), ) if carry_gate_encoder is not None: self._carry_gate_projection = TimeDistributed( # type: ignore torch.nn.Linear( carry_gate_encoder.get_output_dim(), carry_gate_encoder.get_input_dim(), ), ) else: assert highway_encoder.get_output_dim() in (self._input_dim, 1) assert transform_gate_encoder.get_output_dim() in (self._input_dim, 1) if carry_gate_encoder is not None: assert carry_gate_encoder.get_output_dim() in (self._input_dim, 1)
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder self.vocab = vocab self.label_vocab = vocab.get_index_to_token_vocabulary( namespace='labels') inf_vec = torch.Tensor([float('-inf')] * encoder.get_input_dim()) self.class_avgs = [ inf_vec.clone() for i in range(len(self.label_vocab)) ] self.accuracy = CategoricalAccuracy() self.f_beta = FBetaMeasure(1.0, None, [0, 1, 2])
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout_p: int, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.embedding2input = FeedForward( input_dim=word_embeddings.get_output_dim(), num_layers=1, hidden_dims=encoder.get_input_dim(), activations=Activation.by_name('relu')(), dropout=dropout_p) self.encoder = encoder self.hidden2intermediate = FeedForward( input_dim=encoder.get_output_dim(), num_layers=1, hidden_dims=int(encoder.get_output_dim() / 2), activations=Activation.by_name('relu')(), dropout=dropout_p) self.intermediate2tag = nn.Linear( in_features=int(encoder.get_output_dim() / 2), out_features=vocab.get_vocab_size('labels')) # self.accuracy = CategoricalAccuracy() label_vocab = vocab.get_token_to_index_vocabulary('labels').copy() # print("label_vocab: ", label_vocab) [label_vocab.pop(x) for x in ['O', 'OR']] labels_for_metric = list(label_vocab.values()) # print("labels_for_metric: ", labels_for_metric) self.accuracy = CustomFBetaMeasure(beta=1.0, average='micro', labels=labels_for_metric)
def __init__( self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, utterance_embedder2: TextFieldEmbedder, slot_embedder: TextFieldEmbedder, utterance_encoder: Seq2SeqEncoder, utterance_encoder2: Seq2SeqEncoder, slot_encoder: Seq2SeqEncoder, matrix_attention: MatrixAttention, modeling_layer: Seq2SeqEncoder, fc_ff_layer=FeedForward, label_namespace: str = "labels", s1_label_namespace: str = "s1_labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = "BIO", include_start_end_transitions: bool = True, constrain_crf_decoding: bool = True, calculate_span_f1: bool = True, dropout: Optional[float] = 0.3, mask_lstms: bool = True, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), top_k: int = 1, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.label_namespace = label_namespace self.s1_label_namespace = s1_label_namespace self.utterance_embedder = utterance_embedder self.utterance_embedder2 = utterance_embedder2 self.slot_embedder = slot_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.s1_num_tags = self.vocab.get_vocab_size(s1_label_namespace) self.utterance_encoder = utterance_encoder self.utterance_encoder2 = utterance_encoder2 self.slot_encoder = slot_encoder self._matrix_attention = matrix_attention self._modeling_layer = modeling_layer self.fc_ff_layer = fc_ff_layer self.top_k = top_k self._verbose_metrics = verbose_metrics self._mask_lstms = mask_lstms if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() s1_output_dim = feedforward.get_output_dim() else: output_dim = self.fc_ff_layer.get_output_dim() s1_output_dim = self.utterance_encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) self.s1_tag_projection_layer = TimeDistributed( Linear(s1_output_dim, self.s1_num_tags)) if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) s1_labels = self.vocab.get_index_to_token_vocabulary( s1_label_namespace) constraints = allowed_transitions(label_encoding, labels) s1_constraints = allowed_transitions(label_encoding, s1_labels) else: constraints = None s1_constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) #s1 self.s1_crf = ConditionalRandomField( self.s1_num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError( "calculate_span_f1 is True, but no label_encoding was specified." ) self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": self._f1_metric } check_dimensions_match( utterance_embedder.get_output_dim(), utterance_encoder.get_input_dim(), "utterance field embedding dim", "utterance encoder input dim", ) if feedforward is not None: check_dimensions_match( modeling_layer.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) initializer(self)
def __init__(self, encoder: Seq2SeqEncoder, metrics_dict_seq: dict, metrics_dict_reg: dict, vocab: Vocabulary, attention: Attention = DotProductAttention(), seq_weight_loss: float=1.0, reg_weight_loss: float=1.0, reg_seq_weight_loss: float=1.0, predict_seq: bool=True, predict_avg_total_payoff: bool=True, batch_size: int=10, linear_dim=None, dropout: float=None, use_last_hidden_vec: bool=False, use_transformer_encode: bool=False, input_dim: int=0, use_raisha_attention: bool=False, raisha_num_features: int=0, linear_layers_activation='relu', use_raisha_LSTM: bool=False) -> None: super(LSTMBasedModel, self).__init__(vocab) self.encoder = encoder self.use_raisha_LSTM = use_raisha_LSTM if use_transformer_encode: encoder_output_dim = input_dim else: encoder_output_dim = encoder.get_output_dim() self.use_raisha_LSTM = use_raisha_LSTM if use_raisha_attention and raisha_num_features > 0: # add attention layer to create raisha representation self.raisha_attention_layer = attention self.raisha_attention_vector = torch.randn((batch_size, raisha_num_features), requires_grad=True) # linear layer: raisha num features -> saifa num features (encoder.get_input_dim()) self.linear_after_raisha_attention_layer = LinearLayer(input_size=raisha_num_features, output_size=encoder.get_input_dim(), activation=linear_layers_activation) if torch.cuda.is_available(): self.raisha_attention_vector = self.raisha_attention_vector.cuda() else: self.raisha_attention_layer = None self.raisha_attention_vector = None if predict_seq: # need hidden2tag layer if linear_dim is not None: # add linear layer before hidden2tag self.linear_layer = LinearLayer(input_size=encoder_output_dim, output_size=linear_dim, dropout=dropout, activation=linear_layers_activation) hidden2tag_input_size = linear_dim else: self.linear_layer = None hidden2tag_input_size = encoder_output_dim self.hidden2tag = LinearLayer(input_size=hidden2tag_input_size, output_size=vocab.get_vocab_size('labels'), dropout=dropout, activation=linear_layers_activation) if predict_avg_total_payoff: # need attention and regression layer self.attention = attention self.linear_after_attention_layer = LinearLayer(input_size=encoder_output_dim, output_size=batch_size, activation=linear_layers_activation) self.regressor = LinearLayer(input_size=batch_size, output_size=1, activation=linear_layers_activation) self.attention_vector = torch.randn((batch_size, encoder_output_dim), requires_grad=True) if torch.cuda.is_available(): self.attention_vector = self.attention_vector.cuda() self.mse_loss = nn.MSELoss() if predict_avg_total_payoff and predict_seq: # for avg_turn models self.seq_reg_mse_loss = nn.MSELoss() if use_last_hidden_vec: if linear_dim is not None: # add linear layer before last_hidden_reg self.linear_layer = LinearLayer(input_size=encoder_output_dim, output_size=linear_dim, dropout=dropout) self.last_hidden_reg = LinearLayer(input_size=linear_dim, output_size=1, dropout=dropout) else: self.linear_layer = None self.last_hidden_reg = LinearLayer(input_size=encoder_output_dim, output_size=1, dropout=dropout) self.metrics_dict_seq = metrics_dict_seq self.metrics_dict_reg = metrics_dict_reg self.seq_predictions = defaultdict(dict) self.reg_predictions = pd.DataFrame() self._epoch = 0 self._first_pair = None self.seq_weight_loss = seq_weight_loss self.reg_weight_loss = reg_weight_loss self.reg_seq_weight_loss = reg_seq_weight_loss self.predict_seq = predict_seq self.predict_avg_total_payoff = predict_avg_total_payoff self.use_last_hidden_vec = use_last_hidden_vec