def __init__(self, encoder, sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num): super().__init__() window = 'hann' center = True pad_mode = 'reflect' ref = 1.0 amin = 1e-10 top_db = None # Spectrogram extractor self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size, win_length=window_size, window=window, center=center, pad_mode=pad_mode, freeze_parameters=True) # Logmel feature extractor self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size, n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db, freeze_parameters=True) # Spec augmenter self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2, freq_drop_width=8, freq_stripes_num=2) self.encoder = encoder_params[encoder]["init_op"]() self.avg_pool = AdaptiveAvgPool2d((1, 1)) #self.max_pool = AdaptiveMaxPool2d((1, 1)) self.dropout = Dropout(0.3) self.fc = Linear(encoder_params[encoder]['features'], classes_num)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, span_extractor: SpanExtractor, encoder: Seq2SeqEncoder, feedforward_layer: FeedForward = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, evalb_directory_path: str = None) -> None: super(SpanConstituencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.span_extractor = span_extractor self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.feedforward_layer = TimeDistributed(feedforward_layer) if feedforward_layer else None if feedforward_layer is not None: output_dim = feedforward_layer.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward_layer is not None: check_dimensions_match(encoder.get_output_dim(), feedforward_layer.get_input_dim(), "stacked encoder output dim", "feedforward input dim") self.metrics = {label: F1Measure(index) for index, label in self.vocab.get_index_to_token_vocabulary("labels").items()} if evalb_directory_path is not None: self._evalb_score = EvalbBracketingScorer(evalb_directory_path) else: self._evalb_score = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder) -> None: super(SimpleTagger, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.stacked_encoder = stacked_encoder self.tag_projection_layer = TimeDistributed(Linear(self.stacked_encoder.get_output_dim(), self.num_classes)) if text_field_embedder.get_output_dim() != stacked_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the phrase_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), stacked_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) }
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, include_start_end_transitions: bool = True, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator ) -> None: super().__init__(vocab) # add pico tags to vocab pico_vocab = get_pico_label_vocab() self.vocab.extend_from_vocab(pico_vocab) self.label_namespace = 'labels' self.num_tags = self.vocab.get_vocab_size(self.label_namespace) # encode text self.text_field_embedder = text_field_embedder self.encoder = encoder self.dropout = torch.nn.Dropout(dropout) if dropout else None # crf output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) self.crf = ConditionalRandomField( self.num_tags, constraints=None, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } for index, label in self.vocab.get_index_to_token_vocabulary( self.label_namespace).items(): self.metrics['F1_' + label] = F1Measure(positive_label=index) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, tasks: str = None, task_field_embedder: TextFieldEmbedder = None, domain_field_embedder: TextFieldEmbedder = None, source_namespace: str = "tokens", label_namespace: str = "labels", is_crf: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(TaskOnlyEmbeddingTagger, self).__init__(vocab, regularizer) self.tasks = tasks self.task_to_id = {} for i, tsk in enumerate(tasks): self.task_to_id[tsk] = i self.source_namespace = source_namespace self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(label_namespace) self.task_field_embedder = task_field_embedder or text_field_embedder self.domain_field_embedder = domain_field_embedder or text_field_embedder self.stacked_encoder = stacked_encoder self.tag_projection_layer = TimeDistributed( Linear(self.stacked_encoder.get_output_dim() + self.task_field_embedder.get_output_dim(), self.num_classes)) self.is_crf = is_crf if is_crf: self.crf = ConditionalRandomField(self.num_classes) check_dimensions_match(text_field_embedder.get_output_dim(), stacked_encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.metrics = {} self.span_metric = {} for tsk in self.tasks: self.metrics[tsk] = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.span_metric[tsk] = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) initializer(self)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, mmi: dict, beam_width: int, max_decoding_steps: int, target_namespace: str = "tokens", target_embedding_dim: int = None, attention_function: SimilarityFunction = None, scheduled_sampling_ratio: float = 0.0) -> None: super(WebQABase, self).__init__(vocab) self.mmi = mmi self.beam_width = beam_width self._source_embedder = source_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._target_namespace = target_namespace self._attention_function = attention_function self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) num_classes = self.vocab.get_vocab_size(self._target_namespace) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with that of the final hidden states of the encoder. Also, if # we're using attention with ``DotProductSimilarity``, this is needed. self._decoder_output_dim = self._encoder.get_output_dim() target_embedding_dim = target_embedding_dim or self._source_embedder.get_output_dim() self._target_embedder = Embedding(num_classes, target_embedding_dim) if self._attention_function: self._decoder_attention = Attention(self._attention_function) # The output of attention, a weighted average over encoder outputs, will be # concatenated to the input vector of the decoder at each time step. self._decoder_input_dim = self._encoder.get_output_dim() + target_embedding_dim else: self._decoder_input_dim = target_embedding_dim # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, forward_segmental_contextualizer: Seq2SeqEncoder, backward_segmental_contextualizer: Seq2SeqEncoder, label_feature_dim: int, softmax_projection_dim: int, label_namespace: str = "labels", dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = True, initializer: InitializerApplicator = None) -> None: super().__init__(vocab=vocab, text_field_embedder=text_field_embedder, contextualizer=contextualizer, dropout=dropout, num_samples=num_samples, sparse_embeddings=sparse_embeddings, bidirectional=bidirectional, initializer=initializer) self._forward_segmental_contextualizer = forward_segmental_contextualizer self._backward_segmental_contextualizer = backward_segmental_contextualizer if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=softmax_projection_dim, num_samples=num_samples, sparse=sparse_embeddings) else: self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=softmax_projection_dim) self.num_classes = self.vocab.get_vocab_size(label_namespace) self.label_feature_embedding = Embedding(self.num_classes, label_feature_dim) self._forward_dim = contextualizer.get_output_dim() // 2 + \ forward_segmental_contextualizer.get_output_dim() // 2 + \ label_feature_dim self.projection_layer = TimeDistributed(Linear(self._forward_dim, softmax_projection_dim))
def __init__(self, d_model: int = 24, nhead: int = 4, nhead2: int = 1, num_encoder_layers: int = 1, num_CAAN_layers: int = 1, dim_feedforward: int = 2, dropout: float = 0.1, column_num: int = 22, threshold: int = 0, activation: str = "relu", custom_encoder: Optional[Any] = None, custom_CAAN: Optional[Any] = None) -> None: super(Transformer, self).__init__() self.d_model = d_model self.nhead = nhead self.threshold = threshold self.column_num = column_num self.embeddingLayer = Linear(self.column_num, self.d_model) self.period = 12 # Rewards slot for Sharpe Ratio calculation self.rewards = [] # Transformer Encoder encoder_layer = TransformerEncoderLayer(d_model, nhead) encoder_norm = LayerNorm(self.d_model) self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) # Cross Asset Attention Network CAAN_layer = CAAN_Layer(d_model, nhead2, dim_feedforward, dropout, activation, column_num=self.column_num) self.CAAN = CAAN(CAAN_layer, num_CAAN_layers) #Reset Parameters self._reset_parameters()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dropout: Optional[float] = 0, label_encoding: Optional[str] = 'BIO', initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: """ :param vocab: ``Vocabulary`` :param text_field_embedder: ``TextFieldEmbedder`` Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model. :param dropout: :param label_encoding: BIO :param initializer:``InitializerApplicator``, optional (default=``InitializerApplicator()``) Used to initialize the model parameters. :param regularizer:``RegularizerApplicator``, optional (default=``None``) If provided, will be used to calculate the regularization penalty during training. """ super(BertCrfTaggerModel, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size('labels') self._labels_predictor = Linear( self._text_field_embedder.get_output_dim(), self.num_tags) self.dropout = torch.nn.Dropout(dropout) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace='labels', label_encoding=label_encoding) labels = self.vocab.get_index_to_token_vocabulary('labels') constraints = allowed_transitions(label_encoding, labels) self.label_to_index = self.vocab.get_token_to_index_vocabulary( 'labels') self.crf = ConditionalRandomField(self.num_tags, constraints, include_start_end_transitions=False) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, calculate_span_f1: bool = None, label_encoding: Optional[str] = None, label_namespace: str = "labels", verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SimpleTagger, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") # We keep calculate_span_f1 as a constructor argument for API consistency with # the CrfTagger, even it is redundant in this class # (label_encoding serves the same purpose). if calculate_span_f1 and not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } if calculate_span_f1 or label_encoding: self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) else: self._f1_metric = None initializer(self)
def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, er_len=None): super(MultiheadAttentionRPR, self).__init__() self.embed_dim = embed_dim self.kdim = kdim if kdim is not None else embed_dim self.vdim = vdim if vdim is not None else embed_dim self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim self.num_heads = num_heads self.dropout = dropout self.head_dim = embed_dim // num_heads assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim)) if self._qkv_same_embed_dim is False: self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim)) self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim)) if bias: self.in_proj_bias = Parameter(torch.empty(3 * embed_dim)) else: self.register_parameter('in_proj_bias', None) self.out_proj = Linear(embed_dim, embed_dim, bias=bias) if add_bias_kv: self.bias_k = Parameter(torch.empty(1, 1, embed_dim)) self.bias_v = Parameter(torch.empty(1, 1, embed_dim)) else: self.bias_k = self.bias_v = None self.add_zero_attn = add_zero_attn # Adding RPR embedding matrix if (er_len is not None): self.Er = Parameter(torch.rand((er_len, self.head_dim), dtype=torch.float32)) else: self.Er = None self._reset_parameters()
def __init__(self, d_model, nhead, dim_feedforward=256, dropout=0, activation="relu"): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model # self.linear1 = Linear(d_model, dim_feedforward) self.linear1 = LSTM(d_model, d_model * 2, 1, bidirectional=True) self.dropout = Dropout(dropout) # self.linear2 = Linear(dim_feedforward, d_model) self.linear2 = Linear(d_model * 2 * 2, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__(self, vocab: Vocabulary, pivot_phrase_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, negative_sampling: bool = True, num_negative_examples: int = 10) -> None: super().__init__(vocab, regularizer) self.negative_sampling = negative_sampling self.num_negative_examples = num_negative_examples self.pivot_phrase_embedder = pivot_phrase_embedder self.vocab_size = self.vocab.get_vocab_size("words") self.encoder = encoder self._output_projection_layer = Linear(encoder.get_output_dim(), self.vocab_size) self._context_words_embedder = Embedding( self.vocab_size, pivot_phrase_embedder.get_output_dim()) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_tags)) self.crf = ConditionalRandomField(self.num_tags) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__(self, d_model, nhead, hidden_size, dim_feedforward, dropout, activation="relu"): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of improved part self.lstm = LSTM(d_model, hidden_size, 1, bidirectional=True) self.dropout = Dropout(dropout) self.linear = Linear(hidden_size * 2, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, hidden_size: int = 200, num_layers: int = 2) -> None: super(SimpleTagger, self).__init__() self.vocab = vocab self.text_field_embedder = text_field_embedder self.hidden_size = hidden_size self.num_layers = num_layers self.num_classes = self.vocab.get_vocab_size("tags") # TODO(Mark): support masking once utility functions are merged. self.stacked_encoders = LSTM(self.text_field_embedder.get_output_dim(), self.hidden_size, self.num_layers, batch_first=True) self.tag_projection_layer = TimeDistributed( Linear(self.hidden_size, self.num_classes)) self.sequence_loss = torch.nn.CrossEntropyLoss()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SimpleTagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.stacked_encoder = stacked_encoder self.tag_projection_layer = TimeDistributed(Linear(self.stacked_encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), stacked_encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, include_start_end_transitions: bool = True, dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab_size, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, custom_encoder=None, custom_decoder=None): super(Transformer, self).__init__() self.src_embed = TransformerEmbedding(vocab_size, d_model, dropout) self.tgt_embed = TransformerEmbedding(vocab_size, d_model, dropout) if custom_encoder is not None: self.encoder = custom_encoder else: encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout) encoder_norm = LayerNorm(d_model) self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) if custom_decoder is not None: self.decoder = custom_decoder else: decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout) decoder_norm = LayerNorm(d_model) self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm) self.proj = Linear(d_model, vocab_size) self._reset_parameters() self.d_model = d_model self.nhead = nhead
def __init__( self, task: str, vocab: Vocabulary, input_dim: int, loss_weight: float = 1.0, metric: str = 'acc', label_encoding: Optional[str] = None, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.task = task self.vocab = vocab self.input_dim = input_dim self.loss_weight = loss_weight self.num_classes = self.vocab.get_vocab_size(task) self.tag_projection_layer = TimeDistributed( Linear(self.input_dim, self.num_classes)) self.metrics = { "acc": CategoricalAccuracy(), }
def __init__( self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, transformer: Dict, max_decoding_steps: int, target_embedders: Dict[str, TextFieldEmbedder] = None, loss_coefs: Dict = None, ) -> None: super().__init__(vocab) self._target_namespaces = list(filter(lambda x: x != 'span', loss_coefs.keys())) self._decoder_namespaces = transformer.get("num_decoder_layers", {}).keys() self._start_index_dict = {k: self.vocab.get_token_index(START_SYMBOL, k) for k in self._decoder_namespaces} self._end_index_dict = {k: self.vocab.get_token_index(END_SYMBOL, k) for k in self._decoder_namespaces} self._pad_index_dict = {k: self.vocab.get_token_index(self.vocab._padding_token, k) for k in self._target_namespaces} self._loss_coefs = loss_coefs self._metrics = {} for tn in self._target_namespaces: self._metrics[f'{tn}_acc'] = SequenceAccuracy() self._max_decoding_steps = max_decoding_steps self._source_embedder = source_embedder self._ndim = transformer["d_model"] self.pos_encoder = PositionalEncoding(self._ndim, transformer["dropout"]) self._transformer = MultiDecodersTransformer(**transformer) self._transformer.apply(inplace_relu) self._target_embedders = ModuleDict(target_embedders.items()) output_projection_layers = {} for tn in self._target_namespaces: num_classes = self.vocab.get_vocab_size(tn) output_projection_layers[tn] = Linear(self._ndim, num_classes) self._output_projection_layers = ModuleDict(output_projection_layers.items())
def __init__(self, vocab: Vocabulary, pretrained_model: str = None, requires_grad: bool = True, layer_freeze_regexes: List[str] = None, regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._pretrained_model = pretrained_model self._padding_value = 1 # The index of the RoBERTa padding token self._transformer_model = RobertaModel.from_pretrained( pretrained_model) self._dropout = torch.nn.Dropout( self._transformer_model.config.hidden_dropout_prob) for name, param in self._transformer_model.named_parameters(): if layer_freeze_regexes and requires_grad: grad = not any( [bool(re.search(r, name)) for r in layer_freeze_regexes]) else: grad = requires_grad if grad: param.requires_grad = True else: param.requires_grad = False transformer_config = self._transformer_model.config transformer_config.num_labels = 1 self._output_dim = self._transformer_model.config.hidden_size # unifing all model classification layer self._classifier = Linear(self._output_dim, 1) self._classifier.weight.data.normal_(mean=0.0, std=0.02) self._classifier.bias.data.zero_() self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss()
def __init__( self, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, num_tags: int, feedforward: Optional[FeedForward] = None, dropout: float = 0, ): """ Args: text_field_embedder : `TextFieldEmbedder`, required Used to embed the tokens `TextField` we get as input to the model. encoder : `Seq2SeqEncoder` The encoder that we will use in between embedding tokens and predicting output tags. feedforward : `FeedForward`, optional, (default = `None`). An optional feedforward layer to apply after the encoder. """ self.num_tags self.text_field_embedder = text_field_embedder self.encoder = encoder self.feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() # type: ignore else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( # type: ignore Linear(output_dim, num_tags) ) if dropout: self.dropout: Optional[torch.nn.Module] = torch.nn.Dropout(dropout) else: self.dropout = None
def __init__(self, vocab, text_field_embedder, encoder, initializer=InitializerApplicator(), regularizer=None): super(SimpleTagger, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(u"labels") self.encoder = encoder self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_classes)) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), u"text field embedding dim", u"encoder input dim") self.metrics = { u"accuracy": CategoricalAccuracy(), u"accuracy3": CategoricalAccuracy(top_k=3) } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SequenceClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.projection_layer = Linear(self.encoder.get_output_dim(), self.num_classes) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the sequence encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, target_embedder: Embedding, encoder: Seq2SeqEncoder, max_decoding_steps: int, decoding_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, positional_encoding_max_steps: int = 5000, dropout_prob: float = 0.1, residual_dropout_prob: float = 0.2, attention_dropout_prob: float = 0.2, beam_size: int = 1, target_namespace: str = "tokens", label_smoothing_ratio: Optional[float] = None, initializer: Optional[InitializerApplicator] = None) -> None: super(SequenceTransformer, self).__init__(vocab) self._target_namespace = target_namespace self._label_smoothing_ratio = label_smoothing_ratio self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self._token_based_metric = TokenSequenceAccuracy() # Beam Search self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) # Encoder self._encoder = encoder # Vocabulary and embedder self._source_embedder = source_embedder self._target_embedder = target_embedder target_vocab_size = self.vocab.get_vocab_size(self._target_namespace) assert target_vocab_size == self._target_embedder.num_embeddings target_embedding_dim = self._target_embedder.get_output_dim() self._decoding_dim = decoding_dim # Sequence Decoder Features self._output_projection_layer = Linear(self._decoding_dim, target_vocab_size) self._decoder = Decoder( num_layers=num_layers, decoding_dim=decoding_dim, target_embedding_dim=target_embedding_dim, feedforward_hidden_dim=feedforward_hidden_dim, num_attention_heads=num_attention_heads, use_positional_encoding=use_positional_encoding, positional_encoding_max_steps=positional_encoding_max_steps, dropout_prob=dropout_prob, residual_dropout_prob=residual_dropout_prob, attention_dropout_prob=attention_dropout_prob) # Parameter checks and cleanup if self._target_embedder.get_output_dim( ) != self._decoder.target_embedding_dim: raise ConfigurationError( "Target Embedder output_dim doesn't match decoder module's input." ) # if self._encoder.get_output_dim() != self._decoder.get_output_dim(): raise ConfigurationError( f"Encoder output dimension {self._encoder.get_output_dim()} should be" f" equal to decoder dimension {self._self_attention.get_output_dim()}." ) if initializer: initializer(self) # Print the model print(self)
def __init__(self, encoder, dropout_rate=0.0): super().__init__() self.encoder = encoder_params[encoder]["base_net"]() self.avg_pool = AdaptiveAvgPool2d((1, 1)) self.dropout = Dropout(dropout_rate) self.fc = Linear(encoder_params[encoder]["features"], 1)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, top_k: int = 1, ) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.top_k = top_k self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) if feedforward is not None: check_dimensions_match( encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) initializer(self)
def __init__( self, vocab: Vocabulary, attention: Attention, beam_size: int, max_decoding_steps: int, target_embedding_dim: int = 30, copy_token: str = "@COPY@", source_namespace: str = "bert", target_namespace: str = "target_tokens", tensor_based_metric: Metric = None, token_based_metric: Metric = None, initializer: InitializerApplicator = InitializerApplicator(), ) -> None: super().__init__(vocab) self._source_namespace = source_namespace self._target_namespace = target_namespace self._src_start_index = self.vocab.get_token_index( START_SYMBOL, self._source_namespace) self._src_end_index = self.vocab.get_token_index( END_SYMBOL, self._source_namespace) self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self._oov_index = self.vocab.get_token_index(self.vocab._oov_token, self._target_namespace) self._pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) self._copy_index = self.vocab.add_token_to_namespace( copy_token, self._target_namespace) self._tensor_based_metric = tensor_based_metric or BLEU( exclude_indices={ self._pad_index, self._end_index, self._start_index }) self._token_based_metric = token_based_metric self._target_vocab_size = self.vocab.get_vocab_size( self._target_namespace) # Encoding modules. bert_token_embedding = PretrainedBertEmbedder('bert-base-uncased', requires_grad=True) self._source_embedder = bert_token_embedding self._encoder = PassThroughEncoder( input_dim=self._source_embedder.get_output_dim()) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. # We arbitrarily set the decoder's input dimension to be the same as the output dimension. self.encoder_output_dim = self._encoder.get_output_dim() self.decoder_output_dim = self.encoder_output_dim self.decoder_input_dim = self.decoder_output_dim target_vocab_size = self.vocab.get_vocab_size(self._target_namespace) # The decoder input will be a function of the embedding of the previous predicted token, # an attended encoder hidden state called the "attentive read", and another # weighted sum of the encoder hidden state called the "selective read". # While the weights for the attentive read are calculated by an `Attention` module, # the weights for the selective read are simply the predicted probabilities # corresponding to each token in the source sentence that matches the target # token from the previous timestep. self._target_embedder = Embedding(target_vocab_size, target_embedding_dim) self._attention = attention self._input_projection_layer = Linear( target_embedding_dim + self.encoder_output_dim * 2, self.decoder_input_dim) # We then run the projected decoder input through an LSTM cell to produce # the next hidden state. self._decoder_cell = LSTMCell(self.decoder_input_dim, self.decoder_output_dim) # We create a "generation" score for each token in the target vocab # with a linear projection of the decoder hidden state. self._output_generation_layer = Linear(self.decoder_output_dim, target_vocab_size) # We create a "copying" score for each source token by applying a non-linearity # (tanh) to a linear projection of the encoded hidden state for that token, # and then taking the dot product of the result with the decoder hidden state. self._output_copying_layer = Linear(self.encoder_output_dim, self.decoder_output_dim) # At prediction time, we'll use a beam search to find the best target sequence. self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) initializer(self)
def __init__( self, vocab: Vocabulary, input_dim: int, decoder_hidden_size: int, max_decoding_steps: int, output_proj_input_dim: int, target_namespace: str = "targets", target_embedding_dim: int = None, attention: str = "none", dropout: float = 0.0, scheduled_sampling_ratio: float = 0.0, ) -> None: super(Seq2SeqDecoder, self).__init__(vocab) self._max_decoding_steps = max_decoding_steps self._target_namespace = target_namespace # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self._unk_index = self.vocab.get_token_index("@@UNKNOWN@@", self._target_namespace) num_classes = self.vocab.get_vocab_size(self._target_namespace) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with that of the final hidden states of the encoder. Also, if # we're using attention with ``DotProductSimilarity``, this is needed. self._encoder_output_dim = input_dim self._decoder_hidden_dim = decoder_hidden_size if self._encoder_output_dim != self._decoder_hidden_dim: self._projection_encoder_out = Linear(self._encoder_output_dim, self._decoder_hidden_dim) else: self._projection_encoder_out = lambda x: x self._decoder_output_dim = self._decoder_hidden_dim self._output_proj_input_dim = output_proj_input_dim self._target_embedding_dim = target_embedding_dim self._target_embedder = Embedding(num_classes, self._target_embedding_dim) # Used to get an initial hidden state from the encoder states self._sent_pooler = Pooler(project=True, d_inp=input_dim, d_proj=decoder_hidden_size) if attention == "Bahdanau": self._decoder_attention = BahdanauAttention( decoder_hidden_size + target_embedding_dim, input_dim) # The output of attention, a weighted average over encoder outputs, will be # concatenated to the input vector of the decoder at each time # step. self._decoder_input_dim = input_dim + target_embedding_dim elif attention == "bilinear": self._decoder_attention = BilinearAttention( decoder_hidden_size + target_embedding_dim, input_dim) # The output of attention, a weighted average over encoder outputs, will be # concatenated to the input vector of the decoder at each time # step. self._decoder_input_dim = input_dim + target_embedding_dim elif attention == "none": self._decoder_attention = None self._decoder_input_dim = target_embedding_dim else: raise Exception("attention not implemented {}".format(attention)) self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_hidden_dim) # Allow for a bottleneck layer between encoder outputs and distribution over vocab # The bottleneck layer consists of a linear transform and helps to reduce # number of parameters if self._output_proj_input_dim != self._decoder_output_dim: self._projection_bottleneck = Linear(self._decoder_output_dim, self._output_proj_input_dim) else: self._projection_bottleneck = lambda x: x self._output_projection_layer = Linear(self._output_proj_input_dim, num_classes) self._dropout = torch.nn.Dropout(p=dropout)