def __init__( self, dictionary, embed_dim, hidden_dim, num_layers, bidirectional=True, word_delimiter="@SPACE", ): super().__init__() self.word_split = DelimiterSplit(dictionary, word_delimiter) self.dictionary = dictionary num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.embed_chars = rnn.Embedding( num_embeddings=num_embeddings, embedding_dim=embed_dim, padding_idx=self.padding_idx, freeze_embed=False, ) self.bidirectional = bidirectional if self.bidirectional: assert hidden_dim % 2 == 0 self.lstm_encoder = rnn.LSTMSequenceEncoder.LSTM( embed_dim, hidden_dim // 2 if bidirectional else hidden_dim, num_layers=num_layers, bidirectional=bidirectional, )
def __init__( self, dictionary, num_chars, char_embed_dim, token_embed_dim, normalize_embed, char_rnn_units, char_rnn_layers, hidden_dim, num_layers, dropout_in, dropout_out, residual_level, bidirectional, ): super().__init__(dictionary) self.dropout_in = dropout_in self.embed_chars = char_encoder.CharRNNModel( dictionary=dictionary, num_chars=num_chars, char_embed_dim=char_embed_dim, char_rnn_units=char_rnn_units, char_rnn_layers=char_rnn_layers, ) self.embed_tokens = None if token_embed_dim > 0: self.embed_tokens = rnn.Embedding( num_embeddings=len(dictionary), embedding_dim=token_embed_dim, padding_idx=dictionary.pad(), freeze_embed=False, normalize_embed=normalize_embed, ) self.word_dim = char_rnn_units + token_embed_dim self.bilstm = rnn.BiLSTM( num_layers=num_layers, bidirectional=bidirectional, embed_dim=self.word_dim, hidden_dim=hidden_dim, dropout=dropout_out, residual_level=residual_level, ) # disables sorting and word-length thresholding if True # (enables ONNX tracing of length-sorted input with batch_size = 1) self.onnx_export_model = False
def __init__( self, dictionary, num_chars=50, char_embed_dim=32, convolutions_params='((128, 3), (128, 5))', nonlinear_fn_type='tanh', pool_type='max', num_highway_layers=0, ): super().__init__() self.dictionary = dictionary self.padding_idx = dictionary.pad() self.convolutions_params = convolutions_params self.num_highway_layers = num_highway_layers if nonlinear_fn_type == "tanh": nonlinear_fn = nn.Tanh elif nonlinear_fn_type == "relu": nonlinear_fn = nn.ReLU else: raise Exception( "Invalid nonlinear type: {}".format(nonlinear_fn_type)) self.pool_type = pool_type self.embed_chars = rnn.Embedding( num_embeddings=num_chars, embedding_dim=char_embed_dim, padding_idx=self.padding_idx, freeze_embed=False, ) self.convolutions = nn.ModuleList([ nn.Sequential( nn.Conv1d( char_embed_dim, num_filters, kernel_size, padding=kernel_size, ), nonlinear_fn()) for (num_filters, kernel_size) in self.convolutions_params ]) conv_output_dim = sum(out_dim for (out_dim, _) in self.convolutions_params) highway_layers = [] for _ in range(self.num_highway_layers): highway_layers.append(HighwayLayer(conv_output_dim)) self.highway_layers = nn.ModuleList(highway_layers)
def _load_byte_embedding(self): """ Function to load the pre-trained byte embeddings. We need to ensure that the embeddings account for special yoda tags as well. """ char_embed_weights = self.npz_weights["char_embed"] num_tags = TAGS.__len__() weights = np.zeros( (char_embed_weights.shape[0] + num_tags + 1, char_embed_weights.shape[1]), dtype="float32", ) weights[1:-num_tags, :] = char_embed_weights self.embed_chars = rnn.Embedding( num_embeddings=self.num_embeddings, embedding_dim=self.char_embed_dim, padding_idx=self.padding_idx, freeze_embed=self._finetune_pretrained_weights, ) self.embed_chars.weight.data.copy_(torch.FloatTensor(weights))
def __init__(self, dictionary, num_chars, char_embed_dim, char_rnn_units, char_rnn_layers): super().__init__() self.num_chars = num_chars self.padding_idx = dictionary.pad() self.embed_chars = rnn.Embedding( num_embeddings=num_chars, embedding_dim=char_embed_dim, padding_idx=self.padding_idx, freeze_embed=False, ) assert ( char_rnn_units % 2 == 0 ), "char_rnn_units must be even (to be divided evenly between directions)" self.char_lstm_encoder = rnn.LSTMSequenceEncoder.LSTM( char_embed_dim, char_rnn_units // 2, num_layers=char_rnn_layers, bidirectional=True, ) self.onnx_export_model = False
def __init__( self, dictionary, num_chars=50, embed_dim=32, token_embed_dim=256, freeze_embed=False, char_cnn_params="[(128, 3), (128, 5)]", char_cnn_output_dim=256, char_cnn_nonlinear_fn="tanh", char_cnn_pool_type="max", char_cnn_num_highway_layers=0, hidden_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, residual_level=None, bidirectional=False, word_dropout_params=None, ): super().__init__(dictionary) self.dictionary = dictionary self.dropout_in = dropout_in self.dropout_out = dropout_out self.residual_level = residual_level self.hidden_dim = hidden_dim self.bidirectional = bidirectional convolutions_params = literal_eval(char_cnn_params) self.char_cnn_encoder = char_encoder.CharCNNModel( dictionary, num_chars, embed_dim, convolutions_params, char_cnn_nonlinear_fn, char_cnn_pool_type, char_cnn_num_highway_layers, ) self.embed_tokens = None num_tokens = len(dictionary) self.padding_idx = dictionary.pad() if token_embed_dim > 0: self.embed_tokens = rnn.Embedding( num_embeddings=num_tokens, embedding_dim=token_embed_dim, padding_idx=self.padding_idx, freeze_embed=freeze_embed, ) self.word_dim = (sum(out_dim for (out_dim, _) in convolutions_params) + token_embed_dim) self.layers = nn.ModuleList([]) for layer in range(num_layers): is_layer_bidirectional = self.bidirectional and layer == 0 if is_layer_bidirectional: assert hidden_dim % 2 == 0, ( "encoder_hidden_dim must be even if encoder_bidirectional " "(to be divided evenly between directions)" ) self.layers.append( rnn.LSTMSequenceEncoder.LSTM( self.word_dim if layer == 0 else hidden_dim, hidden_dim // 2 if is_layer_bidirectional else hidden_dim, num_layers=1, dropout=self.dropout_out, bidirectional=is_layer_bidirectional, ) ) self.num_layers = len(self.layers) self.word_dropout_module = None if ( word_dropout_params and word_dropout_params["word_dropout_freq_threshold"] is not None and word_dropout_params["word_dropout_freq_threshold"] > 0 ): self.word_dropout_module = word_dropout.WordDropout( dictionary, word_dropout_params )
def __init__( self, dictionary, num_chars, char_embed_dim, token_embed_dim, freeze_embed=False, char_rnn_units=256, char_rnn_layers=1, hidden_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, residual_level=None, bidirectional=False, word_dropout_params=None, ): super().__init__(dictionary) self.dictionary = dictionary self.num_chars = num_chars self.dropout_in = dropout_in self.dropout_out = dropout_out self.residual_level = residual_level self.hidden_dim = hidden_dim self.bidirectional = bidirectional num_tokens = len(dictionary) self.padding_idx = dictionary.pad() self.embed_chars = rnn.Embedding( num_embeddings=num_chars, embedding_dim=char_embed_dim, padding_idx=self.padding_idx, freeze_embed=freeze_embed, ) assert ( char_rnn_units % 2 == 0 ), "char_rnn_units must be even (to be divided evenly between directions)" self.char_lstm_encoder = rnn.LSTMSequenceEncoder.LSTM( char_embed_dim, char_rnn_units // 2, num_layers=char_rnn_layers, bidirectional=True, ) self.embed_tokens = None if token_embed_dim > 0: self.embed_tokens = rnn.Embedding( num_embeddings=num_tokens, embedding_dim=token_embed_dim, padding_idx=self.padding_idx, freeze_embed=freeze_embed, ) self.word_dim = char_rnn_units + token_embed_dim self.layers = nn.ModuleList([]) for layer in range(num_layers): is_layer_bidirectional = self.bidirectional and layer == 0 if is_layer_bidirectional: assert hidden_dim % 2 == 0, ( "encoder_hidden_dim must be even if encoder_bidirectional " "(to be divided evenly between directions)" ) self.layers.append( rnn.LSTMSequenceEncoder.LSTM( self.word_dim if layer == 0 else hidden_dim, hidden_dim // 2 if is_layer_bidirectional else hidden_dim, num_layers=1, dropout=self.dropout_out, bidirectional=is_layer_bidirectional, ) ) self.num_layers = len(self.layers) self.word_dropout_module = None if ( word_dropout_params and word_dropout_params["word_dropout_freq_threshold"] is not None and word_dropout_params["word_dropout_freq_threshold"] > 0 ): self.word_dropout_module = word_dropout.WordDropout( dictionary, word_dropout_params ) # disables sorting and word-length thresholding if True # (enables ONNX tracing of length-sorted input with batch_size = 1) self.onnx_export_model = False
def __init__( self, dictionary, num_chars=50, char_embed_dim=32, convolutions_params="((128, 3), (128, 5))", nonlinear_fn_type="tanh", num_highway_layers=0, # A value of -1 for char_cnn_output_dim implies no projection layer # layer at the output of the highway network char_cnn_output_dim=-1, use_pretrained_weights=False, finetune_pretrained_weights=False, weights_file=None, ): super().__init__() self.dictionary = dictionary self.padding_idx = dictionary.pad() self.use_pretrained_weights = use_pretrained_weights self.convolutions_params = convolutions_params self.num_highway_layers = num_highway_layers self.char_embed_dim = char_embed_dim self.num_embeddings = num_chars self.char_cnn_output_dim = char_cnn_output_dim self.filter_dims = sum(f[0] for f in self.convolutions_params) # If specified, load the pretrained weights from file if use_pretrained_weights: self._weight_file = weights_file self._finetune_pretrained_weights = finetune_pretrained_weights self._load_weights() else: if nonlinear_fn_type == "tanh": nonlinear_fn = nn.Tanh elif nonlinear_fn_type == "relu": nonlinear_fn = nn.ReLU else: raise Exception("Invalid nonlinear type: {}".format(nonlinear_fn_type)) self.embed_chars = rnn.Embedding( num_embeddings=num_chars, embedding_dim=char_embed_dim, padding_idx=self.padding_idx, freeze_embed=False, ) self.convolutions = nn.ModuleList( [ nn.Sequential( nn.Conv1d( char_embed_dim, num_filters, kernel_size, padding=kernel_size, ), nonlinear_fn(), ) for (num_filters, kernel_size) in self.convolutions_params ] ) highway_layers = [] for _ in range(self.num_highway_layers): highway_layers.append(HighwayLayer(self.filter_dims)) self.highway_layers = nn.ModuleList(highway_layers) if char_cnn_output_dim != -1: self.projection = nn.Linear( self.filter_dims, self.char_cnn_output_dim, bias=True )
def __init__( self, dictionary, num_chars=50, unk_only_char_encoding=False, embed_dim=32, token_embed_dim=256, freeze_embed=False, normalize_embed=False, char_cnn_params="[(128, 3), (128, 5)]", char_cnn_nonlinear_fn="tanh", char_cnn_pool_type="max", char_cnn_num_highway_layers=0, char_cnn_output_dim=-1, hidden_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, residual_level=None, bidirectional=False, word_dropout_params=None, use_pretrained_weights=False, finetune_pretrained_weights=False, weights_file=None, ): super().__init__(dictionary) self.dropout_in = dropout_in convolutions_params = literal_eval(char_cnn_params) self.char_cnn_encoder = char_encoder.CharCNNModel( dictionary, num_chars, embed_dim, convolutions_params, char_cnn_nonlinear_fn, char_cnn_pool_type, char_cnn_num_highway_layers, char_cnn_output_dim, use_pretrained_weights, finetune_pretrained_weights, weights_file, ) self.embed_tokens = None num_tokens = len(dictionary) self.padding_idx = dictionary.pad() self.unk_idx = dictionary.unk() if token_embed_dim > 0: self.embed_tokens = rnn.Embedding( num_embeddings=num_tokens, embedding_dim=token_embed_dim, padding_idx=self.padding_idx, freeze_embed=freeze_embed, normalize_embed=normalize_embed, ) self.word_dim = ( char_cnn_output_dim if char_cnn_output_dim != -1 else sum(out_dim for (out_dim, _) in convolutions_params) ) self.token_embed_dim = token_embed_dim self.unk_only_char_encoding = unk_only_char_encoding if self.unk_only_char_encoding: assert char_cnn_output_dim == token_embed_dim, ( "char_cnn_output_dim (%d) must equal to token_embed_dim (%d)" % (char_cnn_output_dim, token_embed_dim) ) self.word_dim = token_embed_dim else: self.word_dim = self.word_dim + token_embed_dim self.bilstm = rnn.BiLSTM( num_layers=num_layers, bidirectional=bidirectional, embed_dim=self.word_dim, hidden_dim=hidden_dim, dropout=dropout_out, residual_level=residual_level, ) # Variable tracker self.tracker = VariableTracker() # Initialize adversarial mode self.set_gradient_tracking_mode(False) self.set_embed_noising_mode(False)